feat: merged with main

This commit is contained in:
Krzysztof Rudnicki 2024-05-28 05:05:00 +02:00
commit 62a72995e6
14 changed files with 9791 additions and 0 deletions

15
Dockerfile Normal file
View File

@ -0,0 +1,15 @@
FROM node:latest
# Create app directory
WORKDIR /usr/src/app
# Install app dependencies
# If you also need http-server globally
RUN npm install -g http-server
# Bundle app source
COPY . .
EXPOSE 8080
CMD ["http-server", "-p 8080"]

Binary file not shown.

View File

View File

@ -0,0 +1,12 @@
1. Receive data from database
2. Calculate data:
a) User Count
b) Movies Count
c) Rating Count
d) Total data size
e) "Hot" movies -> movies that received most ratings during last
week
f) System logs (keeps tracks of all messages exchanged by all services)
1. timestamp (when was a message send)
2. Message raw data
3. Send requests to frondend upon request

View File

View File

View File

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,25 @@
One script upon being ran should deploy the entire solution to some
cloud service
It should:
1. Build everything
2. Connect to cloud service (Azure?)
3. Send the data
After it will be run website should be accessible under some address
(cloud service should provide this address?)
Decide:
What cloud service? (Azure?) Requirements:
a. Free (https://github.com/cloudcommunity/Cloud-Free-Tier-Comparison)
b. Popular
AWS:
+ Most popular
+ "Always" free
+ AWS CDK available
Azure
-Microsoft
Google Cloud
+We have google accounts anyway
What technology for script (Ansible?)

View File

@ -0,0 +1,12 @@
Monitoring service
Monitoring service should keep track of all communication
send between all services
Store logs with:
1. timestamp (when was a message send)
2. Message raw data
To decide:
What to use for Monitoring service?
Maybe cloud service will provide us with functioning one?
Maybe deployment tool already has one?

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,119 @@
import pandas as pd
import numpy as np
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def get_director(x):
for i in x:
if i['job'] == 'Director':
return i['name']
return np.nan
def get_list(x):
if isinstance(x, list):
names = [i['name'] for i in x]
if len(names) > 3:
names = names[:3]
return names
return []
def clean_data(x):
if isinstance(x, list):
return [str.lower(i.replace(" ", "")) for i in x]
else:
if isinstance(x, str):
return str.lower(x.replace(" ", ""))
else:
return ''
def create_soup(x):
return ' '.join(x['keywords']) + ' ' + ' '.join(x['cast']) + ' ' + x['director'] + ' ' + ' '.join(x['genres'])
class MovieRecommender:
def __init__(self):
self.df = None
self.cosine_sim = None
def fit(self, credits_file, movies_file):
"""
Fittuje AI do przekazanych danych
:param credits_file: csv z creditsami
:param movies_file: csv z filmami
:return: Nic
"""
df1 = pd.read_csv(credits_file)
df2 = pd.read_csv(movies_file)
df1.columns = ['id', 'tittle', 'cast', 'crew']
df2 = df2.merge(df1, on='id')
df2['overview'] = df2['overview'].fillna('')
self.df = df2
features = ['cast', 'crew', 'keywords', 'genres']
for feature in features:
df2[feature] = df2[feature].apply(literal_eval)
df2['director'] = df2['crew'].apply(get_director)
features = ['cast', 'keywords', 'genres']
for feature in features:
df2[feature] = df2[feature].apply(get_list)
features = ['cast', 'keywords', 'director', 'genres']
for feature in features:
df2[feature] = df2[feature].apply(clean_data)
df2['soup'] = df2.apply(create_soup, axis=1)
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(df2['soup'])
self.cosine_sim = cosine_similarity(count_matrix, count_matrix)
self.df = df2.reset_index()
def _get_recommendations_one_input(self, movie_id):
"""
Tworzy rekomendacje, bazując na jednym filmie
:param movie_id: id filmu, dla którego ma zrobić rekomendację
:return: Zwraca listę [movie_ids, similarity_scores] gdzie oba argumenty np.array
"""
indices = pd.Series(self.df.index, index=self.df['id']).drop_duplicates()
idx = indices[movie_id]
sim_scores = list(enumerate(self.cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:101]
movie_indices = [i[0] for i in sim_scores]
sim_scores = np.array([t[1] for t in sim_scores])
return [self.df['id'].iloc[movie_indices].values, sim_scores]
def get_recommendations(self, movie_ids: list) -> {}:
"""
Tworzy listę rekomendacji bazującą na id podanych filmów
:param movie_ids: id filmów, na podstawie których ma wybrać rekomendowane filmy
:return: Zwraca dicta {movie_id: similarity_scores}
"""
recommended_movies = {}
for movie_id in movie_ids:
recommended_ids, sim_scores = self._get_recommendations_one_input(movie_id)
for recommended_id, sim_score in zip(recommended_ids, sim_scores):
if recommended_id in movie_ids:
continue
if recommended_movies.get(recommended_id) is None:
recommended_movies[recommended_id] = sim_score / len(movie_ids)
else:
recommended_movies[recommended_id] += sim_score / len(movie_ids)
return recommended_movies
# Przykładowe użycie:
if __name__ == "__main__":
recommender = MovieRecommender()
recommender.fit('datasets/tmdb_5000_credits.csv', 'datasets/tmdb_5000_movies.csv')
recommendations = recommender.get_recommendations([49026, 155, 312113])
print(recommendations)