feat: final version

This commit is contained in:
Krzysztof Rudnicki 2023-04-26 13:13:33 +02:00
parent dc5d24826d
commit 4b9de87f76
3 changed files with 49 additions and 6 deletions

View File

@ -3,11 +3,55 @@ Program that predicts wine quality based on variant2.csv data
"""
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
class LinearRegression:
"""Implements Linear regression method"""
def __init__(self):
self.theta = None
def fit(self, x_values, y_values):
"""
Fit linear regression model to our training data
"""
# Add a column of ones to X for the intercept term
x_values = np.concatenate((np.ones((x_values.shape[0], 1)), y_values), axis=1)
# Compute the least squares solution using the normal equation
self.theta = (
np.linalg.inv(x_values.T.dot(x_values)).dot(x_values.T).dot(y_values)
)
def predict(self, x_values):
"""
Predict target values for our input data using the trained linear regression model.
"""
# Add a column of ones to X for the intercept term
x_values = np.concatenate((np.ones((x_values.shape[0], 1)), x_values), axis=1)
# Make predictions using the learned weights
y_predicted = x_values.dot(self.theta)
return y_predicted
def score(self, x_values, y_values):
"""
Compute the R-squared score of the linear regression model on our test data.
"""
y_predicted = self.predict(x_values)
ss_res = np.sum((y_values - y_predicted) ** 2)
ss_tot = np.sum((y_values - np.mean(y_values)) ** 2)
r2_score = 1 - (ss_res / ss_tot)
return r2_score
wine_df = pd.read_csv("variant2.csv")
wine_df.head()
@ -18,8 +62,7 @@ wine_df.info()
X = wine_df.iloc[:, :-1].values
y = wine_df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

View File

@ -71,9 +71,9 @@ For Logistic regression we checked values of:
For Linear regression we received values:
\begin{lstlisting}[language=bash]
Training MSE: 0.4258083784387746
Training R^2: 0.36545196162068627
Testing R^2: 0.3283887639580225
Training MSE: 0.4258083784387745
Training R^2: 0.3654519616206865
Testing R^2: 0.32838876395802263
\end{lstlisting}
For Logistic regression we received values: