feat: final version

2026-07-04 20:23:04 +02:00 · 2023-04-26 13:13:33 +02:00 · 2023-04-26 13:13:33 +02:00 · 4b9de87f76
commit 4b9de87f76
parent dc5d24826d
3 changed files with 49 additions and 6 deletions
--- a/lab4/main.py
+++ b/lab4/main.py
@ -3,11 +3,55 @@ Program that predicts wine quality based on variant2.csv data
 """
 import pandas as pd
 import seaborn as sns
+import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
+from sklearn.linear_model import LogisticRegression
+
+
+class LinearRegression:
+    """Implements Linear regression method"""
+
+    def __init__(self):
+        self.theta = None
+
+    def fit(self, x_values, y_values):
+        """
+        Fit linear regression model to our training data
+        """
+        # Add a column of ones to X for the intercept term
+        x_values = np.concatenate((np.ones((x_values.shape[0], 1)), y_values), axis=1)
+
+        # Compute the least squares solution using the normal equation
+        self.theta = (
+            np.linalg.inv(x_values.T.dot(x_values)).dot(x_values.T).dot(y_values)
+        )
+
+    def predict(self, x_values):
+        """
+        Predict target values for our input data using the trained linear regression model.
+        """
+        # Add a column of ones to X for the intercept term
+        x_values = np.concatenate((np.ones((x_values.shape[0], 1)), x_values), axis=1)
+
+        # Make predictions using the learned weights
+        y_predicted = x_values.dot(self.theta)
+
+        return y_predicted
+
+    def score(self, x_values, y_values):
+        """
+        Compute the R-squared score of the linear regression model on our test data.
+        """
+        y_predicted = self.predict(x_values)
+        ss_res = np.sum((y_values - y_predicted) ** 2)
+        ss_tot = np.sum((y_values - np.mean(y_values)) ** 2)
+        r2_score = 1 - (ss_res / ss_tot)
+
+        return r2_score
+

 wine_df = pd.read_csv("variant2.csv")
 wine_df.head()
@ -18,8 +62,7 @@ wine_df.info()
 X = wine_df.iloc[:, :-1].values
 y = wine_df.iloc[:, -1].values

-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.2, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

 scaler = StandardScaler()
 X_train = scaler.fit_transform(X_train)
--- a/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.pdf
+++ b/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.pdf
--- a/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.tex
+++ b/lab4/report/EARIN_RUDNICKI_KLISZKO_LAB_4.tex
@ -71,9 +71,9 @@ For Logistic regression we checked values of:

 For Linear regression we received values:
 \begin{lstlisting}[language=bash]
-Training MSE: 0.4258083784387746
-Training R^2: 0.36545196162068627
-Testing R^2: 0.3283887639580225
+Training MSE: 0.4258083784387745
+Training R^2: 0.3654519616206865
+Testing R^2: 0.32838876395802263
 \end{lstlisting}

 For Logistic regression we received values: