mirror of
https://github.com/kuhyx/WUT_Computer_Science.git
synced 2026-07-04 20:23:04 +02:00
feat: final version
This commit is contained in:
parent
dc5d24826d
commit
4b9de87f76
49
lab4/main.py
49
lab4/main.py
@ -3,11 +3,55 @@ Program that predicts wine quality based on variant2.csv data
|
||||
"""
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.linear_model import LinearRegression, LogisticRegression
|
||||
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
|
||||
class LinearRegression:
|
||||
"""Implements Linear regression method"""
|
||||
|
||||
def __init__(self):
|
||||
self.theta = None
|
||||
|
||||
def fit(self, x_values, y_values):
|
||||
"""
|
||||
Fit linear regression model to our training data
|
||||
"""
|
||||
# Add a column of ones to X for the intercept term
|
||||
x_values = np.concatenate((np.ones((x_values.shape[0], 1)), y_values), axis=1)
|
||||
|
||||
# Compute the least squares solution using the normal equation
|
||||
self.theta = (
|
||||
np.linalg.inv(x_values.T.dot(x_values)).dot(x_values.T).dot(y_values)
|
||||
)
|
||||
|
||||
def predict(self, x_values):
|
||||
"""
|
||||
Predict target values for our input data using the trained linear regression model.
|
||||
"""
|
||||
# Add a column of ones to X for the intercept term
|
||||
x_values = np.concatenate((np.ones((x_values.shape[0], 1)), x_values), axis=1)
|
||||
|
||||
# Make predictions using the learned weights
|
||||
y_predicted = x_values.dot(self.theta)
|
||||
|
||||
return y_predicted
|
||||
|
||||
def score(self, x_values, y_values):
|
||||
"""
|
||||
Compute the R-squared score of the linear regression model on our test data.
|
||||
"""
|
||||
y_predicted = self.predict(x_values)
|
||||
ss_res = np.sum((y_values - y_predicted) ** 2)
|
||||
ss_tot = np.sum((y_values - np.mean(y_values)) ** 2)
|
||||
r2_score = 1 - (ss_res / ss_tot)
|
||||
|
||||
return r2_score
|
||||
|
||||
|
||||
wine_df = pd.read_csv("variant2.csv")
|
||||
wine_df.head()
|
||||
@ -18,8 +62,7 @@ wine_df.info()
|
||||
X = wine_df.iloc[:, :-1].values
|
||||
y = wine_df.iloc[:, -1].values
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=0)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
||||
|
||||
scaler = StandardScaler()
|
||||
X_train = scaler.fit_transform(X_train)
|
||||
|
||||
Binary file not shown.
@ -71,9 +71,9 @@ For Logistic regression we checked values of:
|
||||
|
||||
For Linear regression we received values:
|
||||
\begin{lstlisting}[language=bash]
|
||||
Training MSE: 0.4258083784387746
|
||||
Training R^2: 0.36545196162068627
|
||||
Testing R^2: 0.3283887639580225
|
||||
Training MSE: 0.4258083784387745
|
||||
Training R^2: 0.3654519616206865
|
||||
Testing R^2: 0.32838876395802263
|
||||
\end{lstlisting}
|
||||
|
||||
For Logistic regression we received values:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user