diff --git a/lab4/main.py b/lab4/main.py index 103987c6..5dd6ff14 100644 --- a/lab4/main.py +++ b/lab4/main.py @@ -3,11 +3,55 @@ Program that predicts wine quality based on variant2.csv data """ import pandas as pd import seaborn as sns +import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split -from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.metrics import mean_squared_error, accuracy_score, f1_score +from sklearn.linear_model import LogisticRegression + + +class LinearRegression: + """Implements Linear regression method""" + + def __init__(self): + self.theta = None + + def fit(self, x_values, y_values): + """ + Fit linear regression model to our training data + """ + # Add a column of ones to X for the intercept term + x_values = np.concatenate((np.ones((x_values.shape[0], 1)), y_values), axis=1) + + # Compute the least squares solution using the normal equation + self.theta = ( + np.linalg.inv(x_values.T.dot(x_values)).dot(x_values.T).dot(y_values) + ) + + def predict(self, x_values): + """ + Predict target values for our input data using the trained linear regression model. + """ + # Add a column of ones to X for the intercept term + x_values = np.concatenate((np.ones((x_values.shape[0], 1)), x_values), axis=1) + + # Make predictions using the learned weights + y_predicted = x_values.dot(self.theta) + + return y_predicted + + def score(self, x_values, y_values): + """ + Compute the R-squared score of the linear regression model on our test data. + """ + y_predicted = self.predict(x_values) + ss_res = np.sum((y_values - y_predicted) ** 2) + ss_tot = np.sum((y_values - np.mean(y_values)) ** 2) + r2_score = 1 - (ss_res / ss_tot) + + return r2_score + wine_df = pd.read_csv("variant2.csv") wine_df.head() @@ -18,8 +62,7 @@ wine_df.info() X = wine_df.iloc[:, :-1].values y = wine_df.iloc[:, -1].values -X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=0) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) scaler = StandardScaler() X_train = scaler.fit_transform(X_train) diff --git a/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.pdf b/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.pdf index a2bed594..08152f6e 100644 Binary files a/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.pdf and b/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.pdf differ diff --git a/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.tex b/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.tex index 093570fc..2f7f1103 100644 --- a/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.tex +++ b/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.tex @@ -71,9 +71,9 @@ For Logistic regression we checked values of: For Linear regression we received values: \begin{lstlisting}[language=bash] -Training MSE: 0.4258083784387746 -Training R^2: 0.36545196162068627 -Testing R^2: 0.3283887639580225 +Training MSE: 0.4258083784387745 +Training R^2: 0.3654519616206865 +Testing R^2: 0.32838876395802263 \end{lstlisting} For Logistic regression we received values: