diff --git a/main.ipynb b/main.ipynb index 24454d1a..85ca9d7e 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 57, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -22,7 +22,7 @@ "from statistics import median\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.feature_selection import RFE\n", - "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.tree import DecisionTreeRegressor, plot_tree\n", "import numpy as np\n", "from sklearn.metrics import mean_squared_error\n", "import matplotlib.pyplot as plt\n", @@ -1483,6 +1483,53 @@ "display(X_test[np.abs(y_test - y_pred_test) < 1])\n", "display(X_test['Suma'][np.abs(y_test - y_pred_test) < 1].drop_duplicates().values)" ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.3391 — Dochody_podatek_od_srodkow_transportowych\n", + "0.1816 — Dochody_podatek_od_spadkow\n", + "0.1590 — Powierzchnia\n", + "0.0714 — Wynagrodzenie_ogolem\n", + "0.0675 — Dochody_podatek_lesny\n", + "0.0377 — Dochody_razem\n", + "0.0312 — Dochody_podatek_rolny\n", + "0.0258 — Dochody_z_majatku\n", + "0.0220 — Dochody_podatek_PCC\n", + "0.0172 — Wynagrodzenie_w_relacji_do_sredniej\n", + "0.0172 — Dochody_podatek_od_dzialalnosci_gospodarczej\n", + "0.0158 — Dochody_podatek_od_nieruchomosci\n", + "0.0144 — Dochody_podatek_odrebne_ustawy\n" + ] + } + ], + "source": [ + "selector = RFE(estimator=DecisionTreeRegressor(random_state=0, max_depth=best_params[0]), n_features_to_select=best_params[1])\n", + "selector.fit(X, y)\n", + "X_selected = selector.transform(X)\n", + "\n", + "model = DecisionTreeRegressor(random_state=0, max_depth=best_params[0])\n", + "model.fit(X_selected, y)\n", + "\n", + "feature_importance = dict(zip(feature_names, model.feature_importances_))\n", + "for feature, importance in sorted(feature_importance.items(), key=lambda x: x[1], reverse=True):\n", + " print(f'{importance:.4f} \\u2014 {feature}')" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "# plot_tree(model, fontsize=5)" + ] } ], "metadata": {