mirror of
https://github.com/kuhyx/WUT_Computer_Science.git
synced 2026-07-04 16:03:11 +02:00
fix: remove duplicated l1, l2 and cityblock metricss
This commit is contained in:
parent
e05fef37a2
commit
f7c66e552d
@ -299,6 +299,7 @@ def handle_arguments():
|
||||
def auto_mode(data_limit=-1, seed=42, anime="RANDOM"):
|
||||
print("Started auto mode")
|
||||
algorithm_spread = ['auto', 'brute']
|
||||
metric_spread = ['manhattan', 'euclidean', 'cosine']
|
||||
neighbor_spread = [5, "sqrt", "half", "log", "n-1"]
|
||||
# No reason to access and waste computational power every time we run the simulation
|
||||
starting_rating_data, starting_anime_contact_data, starting_rows_number = get_data(
|
||||
@ -310,7 +311,7 @@ def auto_mode(data_limit=-1, seed=42, anime="RANDOM"):
|
||||
for algorithm in algorithm_spread:
|
||||
possibleMetrics = []
|
||||
if algorithm != 'auto':
|
||||
possibleMetrics = sorted(VALID_METRICS_SPARSE[algorithm])
|
||||
possibleMetrics = metric_spread
|
||||
print("testing for algorithm: ", algorithm, possibleMetrics)
|
||||
if possibleMetrics == []:
|
||||
possibleMetrics = [""]
|
||||
|
||||
Binary file not shown.
@ -20,23 +20,33 @@ We decided to use collaborative filtering to develop our model, It makes persona
|
||||
We represent anime data-set as embedding vector \\
|
||||
We use K-nearest neighbors model and decided to test it out with different metrics, neighbors and algorithms \\
|
||||
\subsubsection{Algorithms}
|
||||
We decided to test our model with 3 algorithms:
|
||||
We decided to test our model with 2 algorithms:
|
||||
\begin{enumerate}
|
||||
\item Ball Tree
|
||||
\item KD Tree
|
||||
\item Brute
|
||||
\item Auto
|
||||
\end{enumerate}
|
||||
Ball Tree and KD Tree do not work on sparse input (as is the case with our input) so we decided to omit them
|
||||
|
||||
\subsubsection{Neighbor number}
|
||||
We decided to test our model with 5 different neighbor amount:
|
||||
\begin{enumerate}
|
||||
\item 5
|
||||
\item square root of available data
|
||||
\item half of available data
|
||||
\item logarithm of available data
|
||||
\item n-1 neighbors
|
||||
\item 5 - Popular starting point for small-medium datasets
|
||||
\item square root of available data - Usually helps to balance between underfitting and overfitting
|
||||
\item half of available data - Usually usefull for checking overall trend than specific nuances
|
||||
\item logarithm of available data - Used for very large datasets
|
||||
\item n-1 neighbors - Usually leads to overgeneralization as we use all instances excepct one for prediciton
|
||||
\end{enumerate}
|
||||
|
||||
\subsubsection{Metrics}
|
||||
For brute algorithm we tested it will all possible metrics:
|
||||
\begin{enumerate}
|
||||
\item Cityblock
|
||||
\item Cosine
|
||||
\item Euclidean
|
||||
\item l1
|
||||
\item l2
|
||||
\item Manhattan
|
||||
\end{enumerate}
|
||||
|
||||
\section{Intermediate results}
|
||||
\subsection{Results}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user