WUT_Computer_Science/Programming/ERSMS-project/movie_recommendations/movie_recommender.py
2026-02-06 22:14:41 +01:00

171 lines
5.3 KiB
Python

import pandas as pd
import numpy as np
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import hashlib
import json
from configparser import ConfigParser
import psycopg2
from flask import Flask, request, jsonify
from flask_caching import Cache
app = Flask(__name__)
cache = Cache(config={'CACHE_TYPE': 'SimpleCache'})
db_connector = None
conn = None
def get_director(x):
for i in x:
if i['job'] == 'Director':
return i['name']
return np.nan
def get_list(x):
if isinstance(x, list):
names = [i['name'] for i in x]
if len(names) > 3:
names = names[:3]
return names
return []
def clean_data(x):
if isinstance(x, list):
return [str.lower(i.replace(" ", "")) for i in x]
else:
if isinstance(x, str):
return str.lower(x.replace(" ", ""))
else:
return ''
def create_soup(x):
return ' '.join(x['keywords']) + ' ' + ' '.join(x['cast']) + ' ' + x['director'] + ' ' + ' '.join(x['genres'])
class MovieRecommender:
def __init__(self):
self.df = None
self.cosine_sim = None
def fit(self, credits_file, movies_file):
"""
Fittuje AI do przekazanych danych
:param credits_file: csv z creditsami
:param movies_file: csv z filmami
:return: Nic
"""
df1 = pd.read_csv(credits_file)
df2 = pd.read_csv(movies_file)
df1.columns = ['id', 'tittle', 'cast', 'crew']
df2 = df2.merge(df1, on='id')
df2['overview'] = df2['overview'].fillna('')
self.df = df2
features = ['cast', 'crew', 'keywords', 'genres']
for feature in features:
df2[feature] = df2[feature].apply(literal_eval)
df2['director'] = df2['crew'].apply(get_director)
features = ['cast', 'keywords', 'genres']
for feature in features:
df2[feature] = df2[feature].apply(get_list)
features = ['cast', 'keywords', 'director', 'genres']
for feature in features:
df2[feature] = df2[feature].apply(clean_data)
df2['soup'] = df2.apply(create_soup, axis=1)
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(df2['soup'])
self.cosine_sim = cosine_similarity(count_matrix, count_matrix)
self.df = df2.reset_index()
def _get_recommendations_one_input(self, movie_id):
"""
Tworzy rekomendacje, bazując na jednym filmie
:param movie_id: id filmu, dla którego ma zrobić rekomendację
:return: Zwraca listę [movie_ids, similarity_scores] gdzie oba argumenty są np.array
"""
indices = pd.Series(self.df.index, index=self.df['id']).drop_duplicates()
idx = indices[movie_id]
sim_scores = list(enumerate(self.cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:101]
movie_indices = [i[0] for i in sim_scores]
sim_scores = np.array([t[1] for t in sim_scores])
return [self.df['id'].iloc[movie_indices].values, sim_scores]
def get_recommendations(self, movie_ids: list) -> {}:
"""
Tworzy listę rekomendacji bazującą na id podanych filmów
:param movie_ids: id filmów, na podstawie których ma wybrać rekomendowane filmy
:return: Zwraca dicta {movie_id: similarity_scores}
"""
recommended_movies = {}
for movie_id in movie_ids:
recommended_ids, sim_scores = self._get_recommendations_one_input(movie_id)
for recommended_id, sim_score in zip(recommended_ids, sim_scores):
if recommended_id in movie_ids:
continue
if recommended_movies.get(int(recommended_id)) is None:
recommended_movies[int(recommended_id)] = float(round((sim_score / len(movie_ids)), 4))
else:
recommended_movies[int(recommended_id)] += float(round((sim_score / len(movie_ids)), 4))
return recommended_movies
recommender = MovieRecommender()
recommender.fit('datasets/tmdb_5000_credits.csv',
'datasets/tmdb_5000_movies.csv')
def make_cache_key():
data = request.get_json()
if isinstance(data, list):
data = sorted(data)
key = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest()
return key
@app.route("/api/v3/AI_recommendations", methods=["POST"])
@cache.cached(timeout=300, key_prefix=make_cache_key)
def AI_recommendations():
ids = request.get_json()
recommendations = recommender.get_recommendations(ids)
return jsonify(recommendations)
if __name__ == "__main__":
config = ConfigParser()
config.read("init_scripts/constants.ini")
while True:
try:
conn = psycopg2.connect(
host=config["postgres"]["host"],
database=config["postgres"]["database"],
user=config["postgres"]["user"],
password=config["postgres"]["password"],
port=int(config["postgres"]["port"])
)
except Exception:
print("Trying to connect with database")
continue
else:
break
cache.init_app(app)
app.run(host="localhost", port=8081, debug=True)
conn.close()