feat: merged with main

2026-07-04 16:23:11 +02:00 · 2024-05-28 05:05:00 +02:00 · 2024-05-28 05:05:00 +02:00 · 62a72995e6
commit 62a72995e6
parent 6c0d666085 c18cdacb1a
14 changed files with 9791 additions and 0 deletions
--- a/15
+++ b/15
@ -0,0 +1,15 @@
+FROM node:latest
+
+# Create app directory
+WORKDIR /usr/src/app
+
+# Install app dependencies
+
+# If you also need http-server globally
+RUN npm install -g http-server
+
+# Bundle app source
+COPY . .
+
+EXPOSE 8080
+CMD ["http-server", "-p 8080"]
--- a/docs/Architecture_Requirements.docx
+++ b/docs/Architecture_Requirements.docx
--- a/docs/IndividualServices/AI.txt
+++ b/docs/IndividualServices/AI.txt
--- a/docs/IndividualServices/analytics_service.txt
+++ b/docs/IndividualServices/analytics_service.txt
@ -0,0 +1,12 @@
+1. Receive data from database
+2. Calculate data:    
+    a) User Count 
+    b) Movies Count 
+    c) Rating Count 
+    d) Total data size 
+    e) "Hot" movies -> movies that received most ratings during last 
+        week 
+    f) System logs (keeps tracks of all messages exchanged by all services)
+        1. timestamp (when was a message send)
+        2. Message raw data 
+3. Send requests to frondend upon request 
--- a/docs/IndividualServices/backend.txt
+++ b/docs/IndividualServices/backend.txt
--- a/docs/IndividualServices/frontend.txt
+++ b/docs/IndividualServices/frontend.txt
--- a/docs/IndividualServices/notification.txt
+++ b/docs/IndividualServices/notification.txt
--- a/docs/Meetings/first_meeting.docx
+++ b/docs/Meetings/first_meeting.docx
--- a/docs/project_management_meeting_one.docx
+++ b/docs/project_management_meeting_one.docx
--- a/docs/toDos/deployment_script.txt
+++ b/docs/toDos/deployment_script.txt
@ -0,0 +1,25 @@
+One script upon being ran should deploy the entire solution to some 
+cloud service
+
+It should:
+1. Build everything 
+2. Connect to cloud service (Azure?)
+3. Send the data 
+
+After it will be run website should be accessible under some address 
+    (cloud service should provide this address?)
+
+Decide:
+    What cloud service? (Azure?) Requirements:
+        a. Free (https://github.com/cloudcommunity/Cloud-Free-Tier-Comparison)
+        b. Popular 
+            AWS:
+                +   Most popular 
+                +   "Always" free
+                +   AWS CDK available
+            Azure 
+                -Microsoft
+            Google Cloud 
+                +We have google accounts anyway
+    What technology for script (Ansible?)
+
--- a/docs/toDos/monitoring_service.txt
+++ b/docs/toDos/monitoring_service.txt
@ -0,0 +1,12 @@
+Monitoring service 
+Monitoring service should keep track of all communication 
+send between all services 
+Store logs with:
+1. timestamp (when was a message send)
+2. Message raw data 
+
+To decide: 
+    What to use for Monitoring service? 
+    Maybe cloud service will provide us with functioning one?
+    Maybe deployment tool already has one?
+
--- a/movie_recommendations/datasets/tmdb_5000_credits.csv
+++ b/movie_recommendations/datasets/tmdb_5000_credits.csv
--- a/movie_recommendations/datasets/tmdb_5000_movies.csv
+++ b/movie_recommendations/datasets/tmdb_5000_movies.csv
--- a/movie_recommendations/movie_recommender.py
+++ b/movie_recommendations/movie_recommender.py
@ -0,0 +1,119 @@
+import pandas as pd
+import numpy as np
+from ast import literal_eval
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+
+
+def get_director(x):
+    for i in x:
+        if i['job'] == 'Director':
+            return i['name']
+    return np.nan
+
+
+def get_list(x):
+    if isinstance(x, list):
+        names = [i['name'] for i in x]
+        if len(names) > 3:
+            names = names[:3]
+        return names
+    return []
+
+
+def clean_data(x):
+    if isinstance(x, list):
+        return [str.lower(i.replace(" ", "")) for i in x]
+    else:
+        if isinstance(x, str):
+            return str.lower(x.replace(" ", ""))
+        else:
+            return ''
+
+
+def create_soup(x):
+    return ' '.join(x['keywords']) + ' ' + ' '.join(x['cast']) + ' ' + x['director'] + ' ' + ' '.join(x['genres'])
+
+
+class MovieRecommender:
+    def __init__(self):
+        self.df = None
+        self.cosine_sim = None
+
+    def fit(self, credits_file, movies_file):
+        """
+        Fittuje AI do przekazanych danych
+        :param credits_file: csv z creditsami
+        :param movies_file: csv z filmami
+        :return: Nic
+        """
+        df1 = pd.read_csv(credits_file)
+        df2 = pd.read_csv(movies_file)
+        df1.columns = ['id', 'tittle', 'cast', 'crew']
+        df2 = df2.merge(df1, on='id')
+        df2['overview'] = df2['overview'].fillna('')
+        self.df = df2
+
+        features = ['cast', 'crew', 'keywords', 'genres']
+        for feature in features:
+            df2[feature] = df2[feature].apply(literal_eval)
+
+        df2['director'] = df2['crew'].apply(get_director)
+
+        features = ['cast', 'keywords', 'genres']
+        for feature in features:
+            df2[feature] = df2[feature].apply(get_list)
+
+        features = ['cast', 'keywords', 'director', 'genres']
+        for feature in features:
+            df2[feature] = df2[feature].apply(clean_data)
+
+        df2['soup'] = df2.apply(create_soup, axis=1)
+
+        count = CountVectorizer(stop_words='english')
+        count_matrix = count.fit_transform(df2['soup'])
+        self.cosine_sim = cosine_similarity(count_matrix, count_matrix)
+
+        self.df = df2.reset_index()
+
+    def _get_recommendations_one_input(self, movie_id):
+        """
+        Tworzy rekomendacje, bazując na jednym filmie
+        :param movie_id: id filmu, dla którego ma zrobić rekomendację
+        :return: Zwraca listę [movie_ids, similarity_scores] gdzie oba argumenty są np.array
+        """
+        indices = pd.Series(self.df.index, index=self.df['id']).drop_duplicates()
+        idx = indices[movie_id]
+        sim_scores = list(enumerate(self.cosine_sim[idx]))
+        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
+        sim_scores = sim_scores[1:101]
+        movie_indices = [i[0] for i in sim_scores]
+        sim_scores = np.array([t[1] for t in sim_scores])
+        return [self.df['id'].iloc[movie_indices].values, sim_scores]
+
+    def get_recommendations(self, movie_ids: list) -> {}:
+        """
+        Tworzy listę rekomendacji bazującą na id podanych filmów
+        :param movie_ids: id filmów, na podstawie których ma wybrać rekomendowane filmy
+        :return: Zwraca dicta {movie_id: similarity_scores}
+        """
+        recommended_movies = {}
+        for movie_id in movie_ids:
+            recommended_ids, sim_scores = self._get_recommendations_one_input(movie_id)
+            for recommended_id, sim_score in zip(recommended_ids, sim_scores):
+                if recommended_id in movie_ids:
+                    continue
+
+                if recommended_movies.get(recommended_id) is None:
+                    recommended_movies[recommended_id] = sim_score / len(movie_ids)
+                else:
+                    recommended_movies[recommended_id] += sim_score / len(movie_ids)
+        return recommended_movies
+
+
+# Przykładowe użycie:
+if __name__ == "__main__":
+    recommender = MovieRecommender()
+    recommender.fit('datasets/tmdb_5000_credits.csv', 'datasets/tmdb_5000_movies.csv')
+    recommendations = recommender.get_recommendations([49026, 155, 312113])
+    print(recommendations)