mirror of
https://github.com/kuhyx/WUT_Computer_Science.git
synced 2026-07-04 16:23:11 +02:00
feat: merged with main
This commit is contained in:
commit
62a72995e6
15
Dockerfile
Normal file
15
Dockerfile
Normal file
@ -0,0 +1,15 @@
|
||||
FROM node:latest
|
||||
|
||||
# Create app directory
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
# Install app dependencies
|
||||
|
||||
# If you also need http-server globally
|
||||
RUN npm install -g http-server
|
||||
|
||||
# Bundle app source
|
||||
COPY . .
|
||||
|
||||
EXPOSE 8080
|
||||
CMD ["http-server", "-p 8080"]
|
||||
BIN
docs/Architecture_Requirements.docx
Normal file
BIN
docs/Architecture_Requirements.docx
Normal file
Binary file not shown.
0
docs/IndividualServices/AI.txt
Normal file
0
docs/IndividualServices/AI.txt
Normal file
12
docs/IndividualServices/analytics_service.txt
Normal file
12
docs/IndividualServices/analytics_service.txt
Normal file
@ -0,0 +1,12 @@
|
||||
1. Receive data from database
|
||||
2. Calculate data:
|
||||
a) User Count
|
||||
b) Movies Count
|
||||
c) Rating Count
|
||||
d) Total data size
|
||||
e) "Hot" movies -> movies that received most ratings during last
|
||||
week
|
||||
f) System logs (keeps tracks of all messages exchanged by all services)
|
||||
1. timestamp (when was a message send)
|
||||
2. Message raw data
|
||||
3. Send requests to frondend upon request
|
||||
0
docs/IndividualServices/backend.txt
Normal file
0
docs/IndividualServices/backend.txt
Normal file
0
docs/IndividualServices/frontend.txt
Normal file
0
docs/IndividualServices/frontend.txt
Normal file
0
docs/IndividualServices/notification.txt
Normal file
0
docs/IndividualServices/notification.txt
Normal file
BIN
docs/Meetings/first_meeting.docx
Normal file
BIN
docs/Meetings/first_meeting.docx
Normal file
Binary file not shown.
BIN
docs/project_management_meeting_one.docx
Normal file
BIN
docs/project_management_meeting_one.docx
Normal file
Binary file not shown.
25
docs/toDos/deployment_script.txt
Normal file
25
docs/toDos/deployment_script.txt
Normal file
@ -0,0 +1,25 @@
|
||||
One script upon being ran should deploy the entire solution to some
|
||||
cloud service
|
||||
|
||||
It should:
|
||||
1. Build everything
|
||||
2. Connect to cloud service (Azure?)
|
||||
3. Send the data
|
||||
|
||||
After it will be run website should be accessible under some address
|
||||
(cloud service should provide this address?)
|
||||
|
||||
Decide:
|
||||
What cloud service? (Azure?) Requirements:
|
||||
a. Free (https://github.com/cloudcommunity/Cloud-Free-Tier-Comparison)
|
||||
b. Popular
|
||||
AWS:
|
||||
+ Most popular
|
||||
+ "Always" free
|
||||
+ AWS CDK available
|
||||
Azure
|
||||
-Microsoft
|
||||
Google Cloud
|
||||
+We have google accounts anyway
|
||||
What technology for script (Ansible?)
|
||||
|
||||
12
docs/toDos/monitoring_service.txt
Normal file
12
docs/toDos/monitoring_service.txt
Normal file
@ -0,0 +1,12 @@
|
||||
Monitoring service
|
||||
Monitoring service should keep track of all communication
|
||||
send between all services
|
||||
Store logs with:
|
||||
1. timestamp (when was a message send)
|
||||
2. Message raw data
|
||||
|
||||
To decide:
|
||||
What to use for Monitoring service?
|
||||
Maybe cloud service will provide us with functioning one?
|
||||
Maybe deployment tool already has one?
|
||||
|
||||
4804
movie_recommendations/datasets/tmdb_5000_credits.csv
Normal file
4804
movie_recommendations/datasets/tmdb_5000_credits.csv
Normal file
File diff suppressed because one or more lines are too long
4804
movie_recommendations/datasets/tmdb_5000_movies.csv
Normal file
4804
movie_recommendations/datasets/tmdb_5000_movies.csv
Normal file
File diff suppressed because one or more lines are too long
119
movie_recommendations/movie_recommender.py
Normal file
119
movie_recommendations/movie_recommender.py
Normal file
@ -0,0 +1,119 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from ast import literal_eval
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
|
||||
def get_director(x):
|
||||
for i in x:
|
||||
if i['job'] == 'Director':
|
||||
return i['name']
|
||||
return np.nan
|
||||
|
||||
|
||||
def get_list(x):
|
||||
if isinstance(x, list):
|
||||
names = [i['name'] for i in x]
|
||||
if len(names) > 3:
|
||||
names = names[:3]
|
||||
return names
|
||||
return []
|
||||
|
||||
|
||||
def clean_data(x):
|
||||
if isinstance(x, list):
|
||||
return [str.lower(i.replace(" ", "")) for i in x]
|
||||
else:
|
||||
if isinstance(x, str):
|
||||
return str.lower(x.replace(" ", ""))
|
||||
else:
|
||||
return ''
|
||||
|
||||
|
||||
def create_soup(x):
|
||||
return ' '.join(x['keywords']) + ' ' + ' '.join(x['cast']) + ' ' + x['director'] + ' ' + ' '.join(x['genres'])
|
||||
|
||||
|
||||
class MovieRecommender:
|
||||
def __init__(self):
|
||||
self.df = None
|
||||
self.cosine_sim = None
|
||||
|
||||
def fit(self, credits_file, movies_file):
|
||||
"""
|
||||
Fittuje AI do przekazanych danych
|
||||
:param credits_file: csv z creditsami
|
||||
:param movies_file: csv z filmami
|
||||
:return: Nic
|
||||
"""
|
||||
df1 = pd.read_csv(credits_file)
|
||||
df2 = pd.read_csv(movies_file)
|
||||
df1.columns = ['id', 'tittle', 'cast', 'crew']
|
||||
df2 = df2.merge(df1, on='id')
|
||||
df2['overview'] = df2['overview'].fillna('')
|
||||
self.df = df2
|
||||
|
||||
features = ['cast', 'crew', 'keywords', 'genres']
|
||||
for feature in features:
|
||||
df2[feature] = df2[feature].apply(literal_eval)
|
||||
|
||||
df2['director'] = df2['crew'].apply(get_director)
|
||||
|
||||
features = ['cast', 'keywords', 'genres']
|
||||
for feature in features:
|
||||
df2[feature] = df2[feature].apply(get_list)
|
||||
|
||||
features = ['cast', 'keywords', 'director', 'genres']
|
||||
for feature in features:
|
||||
df2[feature] = df2[feature].apply(clean_data)
|
||||
|
||||
df2['soup'] = df2.apply(create_soup, axis=1)
|
||||
|
||||
count = CountVectorizer(stop_words='english')
|
||||
count_matrix = count.fit_transform(df2['soup'])
|
||||
self.cosine_sim = cosine_similarity(count_matrix, count_matrix)
|
||||
|
||||
self.df = df2.reset_index()
|
||||
|
||||
def _get_recommendations_one_input(self, movie_id):
|
||||
"""
|
||||
Tworzy rekomendacje, bazując na jednym filmie
|
||||
:param movie_id: id filmu, dla którego ma zrobić rekomendację
|
||||
:return: Zwraca listę [movie_ids, similarity_scores] gdzie oba argumenty są np.array
|
||||
"""
|
||||
indices = pd.Series(self.df.index, index=self.df['id']).drop_duplicates()
|
||||
idx = indices[movie_id]
|
||||
sim_scores = list(enumerate(self.cosine_sim[idx]))
|
||||
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
||||
sim_scores = sim_scores[1:101]
|
||||
movie_indices = [i[0] for i in sim_scores]
|
||||
sim_scores = np.array([t[1] for t in sim_scores])
|
||||
return [self.df['id'].iloc[movie_indices].values, sim_scores]
|
||||
|
||||
def get_recommendations(self, movie_ids: list) -> {}:
|
||||
"""
|
||||
Tworzy listę rekomendacji bazującą na id podanych filmów
|
||||
:param movie_ids: id filmów, na podstawie których ma wybrać rekomendowane filmy
|
||||
:return: Zwraca dicta {movie_id: similarity_scores}
|
||||
"""
|
||||
recommended_movies = {}
|
||||
for movie_id in movie_ids:
|
||||
recommended_ids, sim_scores = self._get_recommendations_one_input(movie_id)
|
||||
for recommended_id, sim_score in zip(recommended_ids, sim_scores):
|
||||
if recommended_id in movie_ids:
|
||||
continue
|
||||
|
||||
if recommended_movies.get(recommended_id) is None:
|
||||
recommended_movies[recommended_id] = sim_score / len(movie_ids)
|
||||
else:
|
||||
recommended_movies[recommended_id] += sim_score / len(movie_ids)
|
||||
return recommended_movies
|
||||
|
||||
|
||||
# Przykładowe użycie:
|
||||
if __name__ == "__main__":
|
||||
recommender = MovieRecommender()
|
||||
recommender.fit('datasets/tmdb_5000_credits.csv', 'datasets/tmdb_5000_movies.csv')
|
||||
recommendations = recommender.get_recommendations([49026, 155, 312113])
|
||||
print(recommendations)
|
||||
Loading…
Reference in New Issue
Block a user