WUT_Computer_Science/Programming/EARIN/lab4/main.py

"""
Program that predicts wine quality based on variant2.csv data
"""
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression


class LinearRegression:
    """Implements Linear regression method"""

    def __init__(self):
        self.theta = None

    def fit(self, x_values, y_values):
        """
        Fit linear regression model to our training data
        """
        # Add a column of ones to X for the intercept term
        x_values = np.concatenate((np.ones((x_values.shape[0], 1)), y_values), axis=1)

        # Compute the least squares solution using the normal equation
        self.theta = (
            np.linalg.inv(x_values.T.dot(x_values)).dot(x_values.T).dot(y_values)
        )

    def predict(self, x_values):
        """
        Predict target values for our input data using the trained linear regression model.
        """
        # Add a column of ones to X for the intercept term
        x_values = np.concatenate((np.ones((x_values.shape[0], 1)), x_values), axis=1)

        # Make predictions using the learned weights
        y_predicted = x_values.dot(self.theta)

        return y_predicted

    def score(self, x_values, y_values):
        """
        Compute the R-squared score of the linear regression model on our test data.
        """
        y_predicted = self.predict(x_values)
        ss_res = np.sum((y_values - y_predicted) ** 2)
        ss_tot = np.sum((y_values - np.mean(y_values)) ** 2)
        r2_score = 1 - (ss_res / ss_tot)

        return r2_score


wine_df = pd.read_csv("variant2.csv")
wine_df.head()
wine_df.describe()
wine_df.info()


X = wine_df.iloc[:, :-1].values
y = wine_df.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
regressor = LinearRegression()
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("MSE:", mse)
classifier = LogisticRegression()
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
y_pred_train = regressor.predict(X_train)

train_mse = mean_squared_error(y_train, y_pred_train)
print("Training MSE:", train_mse)

train_r_squared = regressor.score(X_train, y_train)
print("Training R^2:", train_r_squared)

test_r_squared = regressor.score(X_test, y_test)
print("Testing R^2:", test_r_squared)
y_pred_train = classifier.predict(X_train)

train_accuracy = accuracy_score(y_train, y_pred_train)
print("Training Accuracy:", train_accuracy)

train_f1_score = f1_score(y_train, y_pred_train, average="weighted")
print("Training F1 Score:", train_f1_score)

test_f1_score = f1_score(y_test, y_pred, average="weighted")
print("Testing F1 Score:", test_f1_score)

Data1 = sns.countplot(x="quality", data=wine_df)
plt.draw()
plt.waitforbuttonpress(0)
plt.close()
Data2 = sns.heatmap(wine_df.corr(), annot=True)
plt.draw()
plt.waitforbuttonpress(0)
plt.close()
feat: report, make main.py conform to pep8 2023-04-25 00:26:05 +02:00			`"""`
			`Program that predicts wine quality based on variant2.csv data`
			`"""`
feat: initial solution to the task 2023-04-19 19:46:09 +02:00			`import pandas as pd`
feat: report, make main.py conform to pep8 2023-04-25 00:26:05 +02:00			`import seaborn as sns`
feat: final version 2023-04-26 13:13:33 +02:00			`import numpy as np`
feat: report, make main.py conform to pep8 2023-04-25 00:26:05 +02:00			`import matplotlib.pyplot as plt`
			`from sklearn.preprocessing import StandardScaler`
feat: initial solution to the task 2023-04-19 19:46:09 +02:00			`from sklearn.model_selection import train_test_split`
feat: report, make main.py conform to pep8 2023-04-25 00:26:05 +02:00			`from sklearn.metrics import mean_squared_error, accuracy_score, f1_score`
feat: final version 2023-04-26 13:13:33 +02:00			`from sklearn.linear_model import LogisticRegression`


			`class LinearRegression:`
			`"""Implements Linear regression method"""`

			`def __init__(self):`
			`self.theta = None`

			`def fit(self, x_values, y_values):`
			`"""`
			`Fit linear regression model to our training data`
			`"""`
			`# Add a column of ones to X for the intercept term`
			`x_values = np.concatenate((np.ones((x_values.shape[0], 1)), y_values), axis=1)`

			`# Compute the least squares solution using the normal equation`
			`self.theta = (`
			`np.linalg.inv(x_values.T.dot(x_values)).dot(x_values.T).dot(y_values)`
			`)`

			`def predict(self, x_values):`
			`"""`
			`Predict target values for our input data using the trained linear regression model.`
			`"""`
			`# Add a column of ones to X for the intercept term`
			`x_values = np.concatenate((np.ones((x_values.shape[0], 1)), x_values), axis=1)`

			`# Make predictions using the learned weights`
			`y_predicted = x_values.dot(self.theta)`

			`return y_predicted`

			`def score(self, x_values, y_values):`
			`"""`
			`Compute the R-squared score of the linear regression model on our test data.`
			`"""`
			`y_predicted = self.predict(x_values)`
			`ss_res = np.sum((y_values - y_predicted) ** 2)`
			`ss_tot = np.sum((y_values - np.mean(y_values)) ** 2)`
			`r2_score = 1 - (ss_res / ss_tot)`

			`return r2_score`

feat: report, make main.py conform to pep8 2023-04-25 00:26:05 +02:00
			`wine_df = pd.read_csv("variant2.csv")`
			`wine_df.head()`
			`wine_df.describe()`
			`wine_df.info()`


			`X = wine_df.iloc[:, :-1].values`
			`y = wine_df.iloc[:, -1].values`

feat: final version 2023-04-26 13:13:33 +02:00			`X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)`
feat: report, make main.py conform to pep8 2023-04-25 00:26:05 +02:00
			`scaler = StandardScaler()`
			`X_train = scaler.fit_transform(X_train)`
			`X_test = scaler.transform(X_test)`
			`regressor = LinearRegression()`
			`regressor.fit(X_train, y_train)`

			`y_pred = regressor.predict(X_test)`

			`mse = mean_squared_error(y_test, y_pred)`
			`print("MSE:", mse)`
			`classifier = LogisticRegression()`
			`classifier.fit(X_train, y_train)`

			`y_pred = classifier.predict(X_test)`

			`accuracy = accuracy_score(y_test, y_pred)`
			`print("Accuracy:", accuracy)`
			`y_pred_train = regressor.predict(X_train)`

			`train_mse = mean_squared_error(y_train, y_pred_train)`
			`print("Training MSE:", train_mse)`

			`train_r_squared = regressor.score(X_train, y_train)`
			`print("Training R^2:", train_r_squared)`

			`test_r_squared = regressor.score(X_test, y_test)`
			`print("Testing R^2:", test_r_squared)`
			`y_pred_train = classifier.predict(X_train)`

			`train_accuracy = accuracy_score(y_train, y_pred_train)`
			`print("Training Accuracy:", train_accuracy)`

			`train_f1_score = f1_score(y_train, y_pred_train, average="weighted")`
			`print("Training F1 Score:", train_f1_score)`

			`test_f1_score = f1_score(y_test, y_pred, average="weighted")`
			`print("Testing F1 Score:", test_f1_score)`

			`Data1 = sns.countplot(x="quality", data=wine_df)`
			`plt.draw()`
			`plt.waitforbuttonpress(0)`
			`plt.close()`
			`Data2 = sns.heatmap(wine_df.corr(), annot=True)`
			`plt.draw()`
			`plt.waitforbuttonpress(0)`
			`plt.close()`