mirror of
https://github.com/kuhyx/WUT_Computer_Science.git
synced 2026-07-04 19:23:03 +02:00
feat: report, make main.py conform to pep8
This commit is contained in:
parent
00a5de7a30
commit
dc5d24826d
301
.gitignore
vendored
301
.gitignore
vendored
@ -161,3 +161,304 @@ cython_debug/
|
||||
|
||||
lab3/*.jpg
|
||||
|
||||
## Core latex/pdflatex auxiliary files:
|
||||
*.aux
|
||||
*.lof
|
||||
*.log
|
||||
*.lot
|
||||
*.fls
|
||||
*.out
|
||||
*.toc
|
||||
*.fmt
|
||||
*.fot
|
||||
*.cb
|
||||
*.cb2
|
||||
.*.lb
|
||||
|
||||
## Intermediate documents:
|
||||
*.dvi
|
||||
*.xdv
|
||||
*-converted-to.*
|
||||
# these rules might exclude image files for figures etc.
|
||||
# *.ps
|
||||
# *.eps
|
||||
# *.pdf
|
||||
|
||||
## Generated if empty string is given at "Please type another file name for output:"
|
||||
.pdf
|
||||
|
||||
## Bibliography auxiliary files (bibtex/biblatex/biber):
|
||||
*.bbl
|
||||
*.bcf
|
||||
*.blg
|
||||
*-blx.aux
|
||||
*-blx.bib
|
||||
*.run.xml
|
||||
|
||||
## Build tool auxiliary files:
|
||||
*.fdb_latexmk
|
||||
*.synctex
|
||||
*.synctex(busy)
|
||||
*.synctex.gz
|
||||
*.synctex.gz(busy)
|
||||
*.pdfsync
|
||||
|
||||
## Build tool directories for auxiliary files
|
||||
# latexrun
|
||||
latex.out/
|
||||
|
||||
## Auxiliary and intermediate files from other packages:
|
||||
# algorithms
|
||||
*.alg
|
||||
*.loa
|
||||
|
||||
# achemso
|
||||
acs-*.bib
|
||||
|
||||
# amsthm
|
||||
*.thm
|
||||
|
||||
# beamer
|
||||
*.nav
|
||||
*.pre
|
||||
*.snm
|
||||
*.vrb
|
||||
|
||||
# changes
|
||||
*.soc
|
||||
|
||||
# comment
|
||||
*.cut
|
||||
|
||||
# cprotect
|
||||
*.cpt
|
||||
|
||||
# elsarticle (documentclass of Elsevier journals)
|
||||
*.spl
|
||||
|
||||
# endnotes
|
||||
*.ent
|
||||
|
||||
# fixme
|
||||
*.lox
|
||||
|
||||
# feynmf/feynmp
|
||||
*.mf
|
||||
*.mp
|
||||
*.t[1-9]
|
||||
*.t[1-9][0-9]
|
||||
*.tfm
|
||||
|
||||
#(r)(e)ledmac/(r)(e)ledpar
|
||||
*.end
|
||||
*.?end
|
||||
*.[1-9]
|
||||
*.[1-9][0-9]
|
||||
*.[1-9][0-9][0-9]
|
||||
*.[1-9]R
|
||||
*.[1-9][0-9]R
|
||||
*.[1-9][0-9][0-9]R
|
||||
*.eledsec[1-9]
|
||||
*.eledsec[1-9]R
|
||||
*.eledsec[1-9][0-9]
|
||||
*.eledsec[1-9][0-9]R
|
||||
*.eledsec[1-9][0-9][0-9]
|
||||
*.eledsec[1-9][0-9][0-9]R
|
||||
|
||||
# glossaries
|
||||
*.acn
|
||||
*.acr
|
||||
*.glg
|
||||
*.glo
|
||||
*.gls
|
||||
*.glsdefs
|
||||
*.lzo
|
||||
*.lzs
|
||||
*.slg
|
||||
*.slo
|
||||
*.sls
|
||||
|
||||
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
|
||||
# *.ist
|
||||
|
||||
# gnuplot
|
||||
*.gnuplot
|
||||
*.table
|
||||
|
||||
# gnuplottex
|
||||
*-gnuplottex-*
|
||||
|
||||
# gregoriotex
|
||||
*.gaux
|
||||
*.glog
|
||||
*.gtex
|
||||
|
||||
# htlatex
|
||||
*.4ct
|
||||
*.4tc
|
||||
*.idv
|
||||
*.lg
|
||||
*.trc
|
||||
*.xref
|
||||
|
||||
# hyperref
|
||||
*.brf
|
||||
|
||||
# knitr
|
||||
*-concordance.tex
|
||||
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
|
||||
# *.tikz
|
||||
*-tikzDictionary
|
||||
|
||||
# listings
|
||||
*.lol
|
||||
|
||||
# luatexja-ruby
|
||||
*.ltjruby
|
||||
|
||||
# makeidx
|
||||
*.idx
|
||||
*.ilg
|
||||
*.ind
|
||||
|
||||
# minitoc
|
||||
*.maf
|
||||
*.mlf
|
||||
*.mlt
|
||||
*.mtc[0-9]*
|
||||
*.slf[0-9]*
|
||||
*.slt[0-9]*
|
||||
*.stc[0-9]*
|
||||
|
||||
# minted
|
||||
_minted*
|
||||
*.pyg
|
||||
|
||||
# morewrites
|
||||
*.mw
|
||||
|
||||
# newpax
|
||||
*.newpax
|
||||
|
||||
# nomencl
|
||||
*.nlg
|
||||
*.nlo
|
||||
*.nls
|
||||
|
||||
# pax
|
||||
*.pax
|
||||
|
||||
# pdfpcnotes
|
||||
*.pdfpc
|
||||
|
||||
# sagetex
|
||||
*.sagetex.sage
|
||||
*.sagetex.py
|
||||
*.sagetex.scmd
|
||||
|
||||
# scrwfile
|
||||
*.wrt
|
||||
|
||||
# svg
|
||||
svg-inkscape/
|
||||
|
||||
# sympy
|
||||
*.sout
|
||||
*.sympy
|
||||
sympy-plots-for-*.tex/
|
||||
|
||||
# pdfcomment
|
||||
*.upa
|
||||
*.upb
|
||||
|
||||
# pythontex
|
||||
*.pytxcode
|
||||
pythontex-files-*/
|
||||
|
||||
# tcolorbox
|
||||
*.listing
|
||||
|
||||
# thmtools
|
||||
*.loe
|
||||
|
||||
# TikZ & PGF
|
||||
*.dpth
|
||||
*.md5
|
||||
*.auxlock
|
||||
|
||||
# titletoc
|
||||
*.ptc
|
||||
|
||||
# todonotes
|
||||
*.tdo
|
||||
|
||||
# vhistory
|
||||
*.hst
|
||||
*.ver
|
||||
|
||||
# easy-todo
|
||||
*.lod
|
||||
|
||||
# xcolor
|
||||
*.xcp
|
||||
|
||||
# xmpincl
|
||||
*.xmpi
|
||||
|
||||
# xindy
|
||||
*.xdy
|
||||
|
||||
# xypic precompiled matrices and outlines
|
||||
*.xyc
|
||||
*.xyd
|
||||
|
||||
# endfloat
|
||||
*.ttt
|
||||
*.fff
|
||||
|
||||
# Latexian
|
||||
TSWLatexianTemp*
|
||||
|
||||
## Editors:
|
||||
# WinEdt
|
||||
*.bak
|
||||
*.sav
|
||||
|
||||
# Texpad
|
||||
.texpadtmp
|
||||
|
||||
# LyX
|
||||
*.lyx~
|
||||
|
||||
# Kile
|
||||
*.backup
|
||||
|
||||
# gummi
|
||||
.*.swp
|
||||
|
||||
# KBibTeX
|
||||
*~[0-9]*
|
||||
|
||||
# TeXnicCenter
|
||||
*.tps
|
||||
|
||||
# auto folder when using emacs and auctex
|
||||
./auto/*
|
||||
*.el
|
||||
|
||||
# expex forward references with \gathertags
|
||||
*-tags.tex
|
||||
|
||||
# standalone packages
|
||||
*.sta
|
||||
|
||||
# Makeindex log files
|
||||
*.lpz
|
||||
|
||||
# xwatermark package
|
||||
*.xwm
|
||||
|
||||
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
|
||||
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
|
||||
# Uncomment the next line to have this generated file ignored.
|
||||
#*Notes.bib
|
||||
|
||||
87
lab4/main.py
87
lab4/main.py
@ -1,40 +1,69 @@
|
||||
"""
|
||||
Program that predicts wine quality based on variant2.csv data
|
||||
"""
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.linear_model import LinearRegression, LogisticRegression
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.metrics import accuracy_score, mean_squared_error
|
||||
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
|
||||
|
||||
filename = '/home/kuchy/EARIN/lab4/variant2.csv'
|
||||
wine_df = pd.read_csv("variant2.csv")
|
||||
wine_df.head()
|
||||
wine_df.describe()
|
||||
wine_df.info()
|
||||
|
||||
# Load the dataset
|
||||
wine_data = pd.read_csv(filename)
|
||||
|
||||
# Split into features and labels
|
||||
X = wine_data.drop("quality", axis=1)
|
||||
y = wine_data["quality"]
|
||||
X = wine_df.iloc[:, :-1].values
|
||||
y = wine_df.iloc[:, -1].values
|
||||
|
||||
# Split into training and testing sets
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=0)
|
||||
|
||||
# Linear Regression
|
||||
lin_reg = LinearRegression()
|
||||
lin_reg.fit(X_train, y_train)
|
||||
y_pred_lin = lin_reg.predict(X_test)
|
||||
lin_reg_rmse = mean_squared_error(y_test, y_pred_lin, squared=False)
|
||||
scaler = StandardScaler()
|
||||
X_train = scaler.fit_transform(X_train)
|
||||
X_test = scaler.transform(X_test)
|
||||
regressor = LinearRegression()
|
||||
regressor.fit(X_train, y_train)
|
||||
|
||||
# Logistic Regression
|
||||
log_reg = LogisticRegression(multi_class='multinomial', solver='newton-cg')
|
||||
log_reg.fit(X_train, y_train)
|
||||
y_pred_log = log_reg.predict(X_test)
|
||||
log_reg_accuracy = accuracy_score(y_test, y_pred_log)
|
||||
y_pred = regressor.predict(X_test)
|
||||
|
||||
# SVM
|
||||
svm = SVC()
|
||||
svm.fit(X_train, y_train)
|
||||
y_pred_svm = svm.predict(X_test)
|
||||
svm_accuracy = accuracy_score(y_test, y_pred_svm)
|
||||
mse = mean_squared_error(y_test, y_pred)
|
||||
print("MSE:", mse)
|
||||
classifier = LogisticRegression()
|
||||
classifier.fit(X_train, y_train)
|
||||
|
||||
# Compare performance
|
||||
print("Linear Regression RMSE:", lin_reg_rmse)
|
||||
print("Logistic Regression accuracy:", log_reg_accuracy)
|
||||
print("SVM accuracy:", svm_accuracy)
|
||||
y_pred = classifier.predict(X_test)
|
||||
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
print("Accuracy:", accuracy)
|
||||
y_pred_train = regressor.predict(X_train)
|
||||
|
||||
train_mse = mean_squared_error(y_train, y_pred_train)
|
||||
print("Training MSE:", train_mse)
|
||||
|
||||
train_r_squared = regressor.score(X_train, y_train)
|
||||
print("Training R^2:", train_r_squared)
|
||||
|
||||
test_r_squared = regressor.score(X_test, y_test)
|
||||
print("Testing R^2:", test_r_squared)
|
||||
y_pred_train = classifier.predict(X_train)
|
||||
|
||||
train_accuracy = accuracy_score(y_train, y_pred_train)
|
||||
print("Training Accuracy:", train_accuracy)
|
||||
|
||||
train_f1_score = f1_score(y_train, y_pred_train, average="weighted")
|
||||
print("Training F1 Score:", train_f1_score)
|
||||
|
||||
test_f1_score = f1_score(y_test, y_pred, average="weighted")
|
||||
print("Testing F1 Score:", test_f1_score)
|
||||
|
||||
Data1 = sns.countplot(x="quality", data=wine_df)
|
||||
plt.draw()
|
||||
plt.waitforbuttonpress(0)
|
||||
plt.close()
|
||||
Data2 = sns.heatmap(wine_df.corr(), annot=True)
|
||||
plt.draw()
|
||||
plt.waitforbuttonpress(0)
|
||||
plt.close()
|
||||
|
||||
BIN
lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.pdf
Normal file
BIN
lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.pdf
Normal file
Binary file not shown.
@ -1,9 +1,87 @@
|
||||
\documentclass{article}
|
||||
\documentclass{article}[12pt]
|
||||
\usepackage{graphicx} % Required for inserting images
|
||||
\usepackage{listings}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{tabularx}
|
||||
\usepackage{float}
|
||||
\usepackage{subfig}
|
||||
\usepackage[a4paper, total={6in, 8in}]{geometry}
|
||||
|
||||
\title{EARIN}
|
||||
\author{Krzysztof Rudnicki}
|
||||
\title{EARIN Lab 3 Report}
|
||||
\author{Krzysztof Rudnicki, 307585 \\ Jakub Kliszko, 303866 }
|
||||
\date{\today}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
dupa
|
||||
\section{Exercise Variant 2 - Predicting wine quality}
|
||||
Our task was to write a program that predicts wine quality based on data containing: \\
|
||||
fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
|
||||
|
||||
|
||||
\section{Implementation}
|
||||
Program can be ran by installing python, moving to project directory and issuing command:
|
||||
\begin{lstlisting}[language=bash]
|
||||
python main.py
|
||||
\end{lstlisting}
|
||||
We have decided on implementing Linear and Logistical regression methods as we found them the easiest to implement \\
|
||||
There will be 3 types of output \\
|
||||
\begin{enumerate}
|
||||
\item Number of wines with given quality (Graphical)
|
||||
\item How a given parameter impacts quality (Graphical)
|
||||
\item How well did linear and logistical regression performed (Textual)
|
||||
\end{enumerate}
|
||||
Upon clicking any button the next plot will be shown
|
||||
\section{Results}
|
||||
We have successfully implemented program to predict wine quality \\
|
||||
\subsection{Data investigation}
|
||||
There are 11 features in total and 1599 instances of those features \\
|
||||
It is clear that there is an inbalance in quality of wines with majority of wines being either '5' or '6':
|
||||
\begin{figure}[H]
|
||||
\caption{Plot showing inbalance in quality of wine}
|
||||
\includegraphics[width=\textwidth]{inbalance.png}
|
||||
\centering
|
||||
\end{figure}
|
||||
More importantly we checked correlation of parameters:
|
||||
\begin{figure}[H]
|
||||
\caption{Plot showing correlation between parameters, bright squares are positve correleation, dark squares are negative correleation}
|
||||
\includegraphics[width=\textwidth]{corr.png}
|
||||
\centering
|
||||
\end{figure}
|
||||
Bright squares mean that the parameters have positive correlation to each other \\
|
||||
Darker squares mean that the parameters have negative correlation to each other \\
|
||||
\newpage
|
||||
We are most intrested in correleation of certain parameters to quality value \\
|
||||
Alcohol has by far the biggest positive impact on quality with coreleation value of 0.48 (where value of 1 means that those two parameters are equal to eachother), then we have sulphates and citric acid with roughly the same values (0.25 and 0.23 respectively) \\
|
||||
The worst impact on quality is done by volatile acidity (-0.39)
|
||||
\subsection{Methods comparison}
|
||||
For Linear regression we checked values of:
|
||||
\begin{itemize}
|
||||
\item Training Mean squared error - Difference between predicted and true values, the lower the better
|
||||
\item Training $R^2$ - for given data, The higher the better
|
||||
\item Testing $R^2$ - for new data, The higher the better
|
||||
\end{itemize}
|
||||
|
||||
For Logistic regression we checked values of:
|
||||
\begin{itemize}
|
||||
\item Training Accuracy - how many instances we correctly classified, the higher the better
|
||||
\item Training F1 Score - for given data, The higher the better
|
||||
\item Testing F1 Score - for new data, The higher the better
|
||||
\end{itemize}
|
||||
|
||||
For Linear regression we received values:
|
||||
\begin{lstlisting}[language=bash]
|
||||
Training MSE: 0.4258083784387746
|
||||
Training R^2: 0.36545196162068627
|
||||
Testing R^2: 0.3283887639580225
|
||||
\end{lstlisting}
|
||||
|
||||
For Logistic regression we received values:
|
||||
\begin{lstlisting}[language=bash]
|
||||
Training Accuracy: 0.596559812353401
|
||||
Training F1 Score: 0.5806169210603433
|
||||
Testing F1 Score: 0.6166756344362352
|
||||
\end{lstlisting}
|
||||
We can see that Logistic regression outperforms linear regression, its test scores which is supposed to be as high as possible are twice as good as ones in linear regression
|
||||
|
||||
\end{document}
|
||||
BIN
lab4/report/corr.png
Normal file
BIN
lab4/report/corr.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 143 KiB |
BIN
lab4/report/inbalance.png
Normal file
BIN
lab4/report/inbalance.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 20 KiB |
Loading…
Reference in New Issue
Block a user