WUT_Computer_Science/main.ipynb

1543 lines
530 KiB
Plaintext
Raw Normal View History

2024-05-06 22:55:21 +02:00
{
"cells": [
2024-05-06 23:50:22 +02:00
{
"cell_type": "code",
2024-05-22 13:55:56 +02:00
"execution_count": 71,
2024-05-06 23:50:22 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-22 13:01:06 +02:00
"['Program Operacyjny Infrastruktura i ?rodowisko 2014-2020'\n",
" 'Program Operacyjny Inteligentny RozwĂłj'\n",
" 'Program Operacyjny Polska Cyfrowa'\n",
" 'Program Operacyjny Wiedza Edukacja RozwĂłj'\n",
" 'Program Operacyjny Polska Wschodnia']\n"
2024-05-06 23:50:22 +02:00
]
}
],
"source": [
2024-05-22 13:01:06 +02:00
"import pandas as pd\n",
"from statistics import median\n",
2024-05-06 23:50:22 +02:00
"from sklearn.model_selection import train_test_split\n",
2024-05-22 13:01:06 +02:00
"from sklearn.feature_selection import RFE\n",
2024-05-22 13:55:56 +02:00
"from sklearn.tree import DecisionTreeRegressor, plot_tree\n",
2024-05-22 13:01:06 +02:00
"import numpy as np\n",
2024-05-06 23:50:22 +02:00
"from sklearn.metrics import mean_squared_error\n",
"import matplotlib.pyplot as plt\n",
2024-05-22 13:01:06 +02:00
"import seaborn as sns\n",
2024-05-06 23:50:22 +02:00
"\n",
"feature_names = [\n",
2024-05-09 08:21:38 +02:00
" 'Powierzchnia', # 1\n",
" 'Wynagrodzenie_ogolem', # 2\n",
" 'Wynagrodzenie_w_relacji_do_sredniej', # 3\n",
" 'Dochody_podatek_lesny', # 4\n",
" 'Dochody_podatek_PCC', # 5\n",
" 'Dochody_podatek_od_dzialalnosci_gospodarczej', # 6\n",
" 'Dochody_podatek_od_nieruchomosci', # 7\n",
" 'Dochody_podatek_od_spadkow', # 8\n",
" 'Dochody_podatek_od_srodkow_transportowych', # 9\n",
" 'Dochody_podatek_rolny', # 10\n",
" 'Dochody_podatek_odrebne_ustawy', # 11\n",
" 'Dochody_razem', # 12\n",
" 'Dochody_z_majatku', # 13\n",
" 'Dochody_z_najmu_i_dzierzawy', # 14\n",
" 'Dochody_z_uslug', # 15\n",
" 'Dochody_dofinansowanie_inwestycyjne', # 16\n",
" 'Dochody_dofinansowanie_razem', # 17\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_fizycznych', # 18\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_prywatnych', # 19\n",
" 'Udzialy_w_podatkach_dochodowych_razem', # 20\n",
" 'Wplywy_z_innych_lokalnych_oplat', # 21\n",
" 'Wplywy_z_oplaty_eksploatacyjnej', # 22\n",
" 'Wplywy_z_oplaty_skarbowej', # 23\n",
" 'Wplywy_z_oplaty_targowej', # 24\n",
" 'Ludnosc_ogolem', # 25\n",
" 'Ludnosc_w_wieku_poprodukcyjnym', # 26\n",
" 'Ludnosc_w_wieku_produkcyjnym', # 27\n",
" 'Ludnosc_w_wieku_produkcyjnym_mobilnym', # 28\n",
" 'Ludnosc_w_wieku_produkcyjnym_niemobilnym', # 29\n",
" 'Ludnosc_w_wieku_przedprodukcyjnym', # 30\n",
" 'Ludnosc_mezczyzni', # 31\n",
" 'Ludnosc_mezczyzni_w_wieku_poprodukcyjnym', # 32\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym', # 33\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym', # 34\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym', # 35\n",
" 'Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym', # 36\n",
" 'Ludnosc_kobiety', # 37\n",
" 'Ludnosc_kobiety_w_wieku_poprodukcyjnym', # 38\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym', # 39\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym', # 40\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym', # 41\n",
" 'Ludnosc_kobiety_w_wieku_przedprodukcyjnym', # 42\n",
" 'Wojewodztwo_Dolnoslaskie', # 43\n",
" 'Wojewodztwo_Kujawsko_Pomorskie', # 44\n",
" 'Wojewodztwo_Lubelskie', # 45\n",
" 'Wojewodztwo_Lubuskie', # 46\n",
" 'Wojewodztwo_Lodzkie', # 47\n",
" 'Wojewodztwo_Malopolskie', # 48\n",
" 'Wojewodztwo_Mazowieckie', # 49\n",
" 'Wojewodztwo_Opolskie', # 50\n",
" 'Wojewodztwo_Podkarpackie', # 51\n",
" 'Wojewodztwo_Podlaskie', # 52\n",
" 'Wojewodztwo_Pomorskie', # 53\n",
" 'Wojewodztwo_Slaskie', # 54\n",
" 'Wojewodztwo_Swietokrzyskie', # 55\n",
" 'Wojewodztwo_Warminsko_Mazurskie', # 56\n",
" 'Wojewodztwo_Wielkopolskie', # 57\n",
" 'Wojewodztwo_Zachodniopomorskie', # 58\n",
" 'Gestosc_zaludnienia', # 59\n",
" 'Ludnosc_na_1_km2', # 60\n",
" 'Ludnosc', # 61\n",
2024-05-22 13:01:06 +02:00
" 'Wskaznik_urbanizacji', # 62\n",
" 'Zmiana_liczby_ludnosci', # 63\n",
" 'Saldo_migracji_na_1000_ludnosci', # 64\n",
" 'Saldo_migracji', # 65\n",
" 'Wymeldowania_do_miast_kobiety', # 66\n",
" 'Wymeldowania_do_miast_mezczyzni', # 67\n",
" 'Wymeldowania_do_miast_ogolem', # 68\n",
" 'Wymeldowania_na_wies_kobiety', # 69\n",
" 'Wymeldowania_na_wies_mezczyzni', # 70\n",
" 'Wymeldowania_na_wies_ogolem', # 71\n",
" 'Wymeldowania_kobiety', # 72\n",
" 'Wymeldowania_mezczyzni', # 73\n",
" 'Wymeldowania_ogolem', # 74\n",
" 'Zameldowania_kobiety', # 75\n",
" 'Zameldowania_mezczyzni', # 76\n",
" 'Zameldowania_ogolem', # 77\n",
" 'Zameldowania_z_miast_kobiety', # 78\n",
" 'Zameldowania_z_miast_mezczyzni', # 79\n",
" 'Zameldowania_z_miast_ogolem', # 80\n",
" 'Zameldowania_ze_wsi_kobiety', # 81\n",
" 'Zameldowania_ze_wsi_mezczyzni', # 82\n",
" 'Zameldowania_ze_wsi_ogolem', # 83\n",
" 'Miejsca_noclegowe_caloroczne', # 84\n",
" 'Miejsca_noclegowe_ogolem', # 85\n",
" 'Obiekty_caloroczne', # 86\n",
" 'Obiekty_ogolem', # 87\n",
" 'Turysci_ogolem', # 88\n",
" 'Turysci_zagraniczni', # 89\n",
" 'Bezrobotni_do_25_roku_zycia', # 90\n",
" 'Dlugotrwale_bezrobotni', # 91\n",
" 'Bezrobotne_kobiety', # 92\n",
" 'Bezrobotni_mezczyzni', # 93\n",
" 'Bezrobotni_ogolem', # 94\n",
" 'Bezrobotni_powyzej_50_roku_zycia', # 95\n",
" 'Gmina_miejska', # 96\n",
" 'Gmina_miejsko_wiejska', # 97\n",
" 'Gmina_wiejska', # 98\n",
" 'Odleglosc_Warszawa', # 99\n",
" 'Odleglosc_od_centrum_decyzyjnego'] # 100\n",
"\n",
"all_columns = ['Kod', 'Rok'] + feature_names\n",
"\n",
"df_data = pd.read_csv(\n",
" 'dane1.csv',\n",
" encoding='ISO-8859-2',\n",
" converters={'Kod': str})\n",
"\n",
"df_odl = pd.read_csv(\n",
" 'gminy_centroid.csv',\n",
" encoding='ISO-8859-2',\n",
" converters={'TERYT': str})\n",
"df_odl['TERYT'] = df_odl['TERYT'].astype('str')\n",
"df_odl = df_odl[['TERYT', 'odl_Wawa', 'odl_woj']]\n",
"df_odl = df_odl.rename(columns={\n",
" 'TERYT': 'Kod',\n",
" 'odl_Wawa': 'Odleglosc_Warszawa',\n",
" 'odl_woj': 'Odleglosc_od_centrum_decyzyjnego'})\n",
"\n",
"df_data = df_data.merge(df_odl, on=['Kod'], how='left')\n",
"\n",
"print(df_data['Program_operacyjny'].drop_duplicates().values)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Median 549828.735\n",
"Mean 893364.8266077801\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_14224\\845662055.py:23: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
" df_data = df_data.groupby(all_columns)['Suma'].sum().reset_index()\n",
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_14224\\845662055.py:23: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
" df_data = df_data.groupby(all_columns)['Suma'].sum().reset_index()\n",
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_14224\\845662055.py:23: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
" df_data = df_data.groupby(all_columns)['Suma'].sum().reset_index()\n"
]
}
],
"source": [
"# Wybór programu operacyjnego...\n",
"df_data = df_data.loc[df_data['Program_operacyjny'] == 'Program Operacyjny Polska Cyfrowa'].reset_index(drop=True)\n",
"\n",
2024-05-22 13:01:06 +02:00
"# Uzupełnienie brakujących danych...\n",
"df_data[[\n",
" 'Miejsca_noclegowe_caloroczne',\n",
" 'Miejsca_noclegowe_ogolem',\n",
" 'Obiekty_caloroczne',\n",
" 'Obiekty_ogolem',\n",
" 'Turysci_ogolem',\n",
" 'Turysci_zagraniczni']] = df_data[[\n",
" 'Miejsca_noclegowe_caloroczne',\n",
" 'Miejsca_noclegowe_ogolem',\n",
" 'Obiekty_caloroczne',\n",
" 'Obiekty_ogolem',\n",
" 'Turysci_ogolem',\n",
" 'Turysci_zagraniczni']].fillna(0)\n",
"\n",
2024-05-22 13:01:06 +02:00
"# Usunięcie niepotrzebnych rzędów...\n",
"df_data.dropna(inplace=True)\n",
"df_data = df_data[df_data['Suma'] > 0]\n",
2024-05-22 13:01:06 +02:00
"\n",
"df_data = df_data.groupby(all_columns)['Suma'].sum().reset_index()\n",
"\n",
"# ...\n",
"# df_data['Suma'] = df_data['Suma'] / df_data['Ludnosc']\n",
"\n",
"print('Median', median(df_data['Suma']))\n",
"print('Mean', sum(df_data['Suma'])/len(df_data['Suma']))\n",
2024-05-06 23:50:22 +02:00
"\n",
"X = df_data[feature_names]\n",
"y = df_data['Suma']\n",
"\n",
2024-05-22 13:01:06 +02:00
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"max_depth: 3, n_features: 20, mse_train: 985605388909.4, mse_test: 1473640288814.4 <-\n",
"max_depth: 3, n_features: 19, mse_train: 985605388909.4, mse_test: 1473640288814.4 <-\n",
"max_depth: 3, n_features: 18, mse_train: 985605388909.4, mse_test: 1473640288814.4 <-\n",
"max_depth: 3, n_features: 17, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 16, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 15, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 14, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 13, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 12, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 11, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 10, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 9, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 8, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 7, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 6, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 5, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 4, mse_train: 985605388909.4, mse_test: 1473640288814.4\n",
"max_depth: 3, n_features: 3, mse_train: 1016104087301.3, mse_test: 1477893916125.6\n",
"max_depth: 3, n_features: 2, mse_train: 1089815493742.4, mse_test: 1594239666432.3\n",
"max_depth: 4, n_features: 20, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 19, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 18, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 17, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 16, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 15, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 14, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 13, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 12, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 11, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 10, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 9, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 8, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 7, mse_train: 899824809745.5, mse_test: 1499284611541.4\n",
"max_depth: 4, n_features: 6, mse_train: 908346033942.9, mse_test: 1490386752565.4\n",
"max_depth: 4, n_features: 5, mse_train: 926529787722.3, mse_test: 1402781153280.7 <-\n",
"max_depth: 4, n_features: 4, mse_train: 964049275485.7, mse_test: 1489236448304.3\n",
"max_depth: 4, n_features: 3, mse_train: 1041479069310.2, mse_test: 1645003773149.2\n",
"max_depth: 4, n_features: 2, mse_train: 1048024137736.7, mse_test: 1631458739231.6\n",
"max_depth: 5, n_features: 20, mse_train: 777894457863.3, mse_test: 1481628937180.8\n",
"max_depth: 5, n_features: 19, mse_train: 777894457863.3, mse_test: 1481628937180.8\n",
"max_depth: 5, n_features: 18, mse_train: 777894457863.3, mse_test: 1456513737768.1\n",
"max_depth: 5, n_features: 17, mse_train: 777894457863.3, mse_test: 1481628937180.8\n",
"max_depth: 5, n_features: 16, mse_train: 777894457863.3, mse_test: 1467015002114.1\n",
"max_depth: 5, n_features: 15, mse_train: 777894457863.3, mse_test: 1471127672834.8\n",
"max_depth: 5, n_features: 14, mse_train: 777894457863.3, mse_test: 1456513737768.1\n",
"max_depth: 5, n_features: 13, mse_train: 777894457863.3, mse_test: 1471079590321.0\n",
"max_depth: 5, n_features: 12, mse_train: 777894457863.3, mse_test: 1471127672834.8\n",
"max_depth: 5, n_features: 11, mse_train: 777894457863.3, mse_test: 1456513737768.1\n",
"max_depth: 5, n_features: 10, mse_train: 778314815085.4, mse_test: 1418071734165.4\n",
"max_depth: 5, n_features: 9, mse_train: 778314815085.4, mse_test: 1432733751745.9\n",
"max_depth: 5, n_features: 8, mse_train: 787935644695.6, mse_test: 1430421492243.5\n",
"max_depth: 5, n_features: 7, mse_train: 798366359029.2, mse_test: 1412228802401.6\n",
"max_depth: 5, n_features: 6, mse_train: 849020081720.8, mse_test: 1379810167829.9 <-\n",
"max_depth: 5, n_features: 5, mse_train: 855870271330.2, mse_test: 1392618114515.3\n",
"max_depth: 5, n_features: 4, mse_train: 829431106712.3, mse_test: 1477225230229.6\n",
"max_depth: 5, n_features: 3, mse_train: 843820926828.0, mse_test: 1536212207013.9\n",
"max_depth: 5, n_features: 2, mse_train: 990725881946.2, mse_test: 1714327909974.3\n",
"max_depth: 6, n_features: 20, mse_train: 702120313181.4, mse_test: 1281312959244.0 <-\n",
"max_depth: 6, n_features: 19, mse_train: 702120313181.4, mse_test: 1348026449571.3\n",
"max_depth: 6, n_features: 18, mse_train: 702120313181.4, mse_test: 1337477102711.6\n",
"max_depth: 6, n_features: 17, mse_train: 702120313181.4, mse_test: 1337477102711.6\n",
"max_depth: 6, n_features: 16, mse_train: 702120313181.4, mse_test: 1337525185225.3\n",
"max_depth: 6, n_features: 15, mse_train: 702142989310.9, mse_test: 1337358249457.9\n",
"max_depth: 6, n_features: 14, mse_train: 704419162738.6, mse_test: 1352770446230.8\n",
"max_depth: 6, n_features: 13, mse_train: 703703386581.1, mse_test: 1289272765063.3\n",
"max_depth: 6, n_features: 12, mse_train: 703703386581.1, mse_test: 1309887199769.1\n",
"max_depth: 6, n_features: 11, mse_train: 708724446143.4, mse_test: 1234642841999.7 <-\n",
"max_depth: 6, n_features: 10, mse_train: 711594460766.6, mse_test: 1517285033906.0\n",
"max_depth: 6, n_features: 9, mse_train: 716036934079.4, mse_test: 1475542279563.4\n",
"max_depth: 6, n_features: 8, mse_train: 722046384072.1, mse_test: 1484380668197.3\n",
"max_depth: 6, n_features: 7, mse_train: 734664074932.7, mse_test: 1541936247863.0\n",
"max_depth: 6, n_features: 6, mse_train: 740451460882.2, mse_test: 1473087677930.4\n",
"max_depth: 6, n_features: 5, mse_train: 781881945028.2, mse_test: 1432693773418.1\n",
"max_depth: 6, n_features: 4, mse_train: 769096509833.5, mse_test: 1500627250338.5\n",
"max_depth: 6, n_features: 3, mse_train: 726228905814.3, mse_test: 1553623318775.1\n",
"max_depth: 6, n_features: 2, mse_train: 954694116124.2, mse_test: 1793011349050.2\n",
"max_depth: 7, n_features: 20, mse_train: 640943402298.7, mse_test: 1496067658852.6\n",
"max_depth: 7, n_features: 19, mse_train: 640942908841.0, mse_test: 1540205675724.2\n",
"max_depth: 7, n_features: 18, mse_train: 642047277467.7, mse_test: 1306670793440.2\n",
"max_depth: 7, n_features: 17, mse_train: 642047277467.7, mse_test: 1507534134549.3\n",
"max_depth: 7, n_features: 16, mse_train: 643760549982.0, mse_test: 1521231780106.0\n",
"max_depth: 7, n_features: 15, mse_train: 644701007059.7, mse_test: 1553343966490.6\n",
"max_depth: 7, n_features: 14, mse_train: 646961921791.8, mse_test: 1309275764750.8\n",
"max_depth: 7, n_features: 13, mse_train: 649210980784.4, mse_test: 1267172470255.6\n",
"max_depth: 7, n_features: 12, mse_train: 644449493626.2, mse_test: 1429604209608.8\n",
"max_depth: 7, n_features: 11, mse_train: 645231854299.2, mse_test: 1261650869426.7\n",
"max_depth: 7, n_features: 10, mse_train: 661000789616.8, mse_test: 1469653596135.2\n",
"max_depth: 7, n_features: 9, mse_train: 671851731055.9, mse_test: 1443317676733.9\n",
"max_depth: 7, n_features: 8, mse_train: 668963275155.1, mse_test: 1389604788794.3\n",
"max_depth: 7, n_features: 7, mse_train: 632753850957.4, mse_test: 1485243578684.9\n",
"max_depth: 7, n_features: 6, mse_train: 637473670864.0, mse_test: 1531783207565.3\n",
"max_depth: 7, n_features: 5, mse_train: 644623424816.4, mse_test: 1649506611800.3\n",
"max_depth: 7, n_features: 4, mse_train: 603471006353.0, mse_test: 1617678875412.5\n",
"max_depth: 7, n_features: 3, mse_train: 641958171995.7, mse_test: 1498648924671.5\n",
"max_depth: 7, n_features: 2, mse_train: 912569491717.4, mse_test: 1845041901590.2\n",
"max_depth: 8, n_features: 20, mse_train: 597165835865.7, mse_test: 1486064437752.0\n",
"max_depth: 8, n_features: 19, mse_train: 597165835865.7, mse_test: 1357765835520.1\n",
"max_depth: 8, n_features: 18, mse_train: 597168055646.7, mse_test: 1489452845516.8\n",
"max_depth: 8, n_features: 17, mse_train: 595624202073.7, mse_test: 1245652964515.9\n",
"max_depth: 8, n_features: 16, mse_train: 597142513301.3, mse_test: 1161784034070.4 <-\n",
"max_depth: 8, n_features: 15, mse_train: 590349542179.8, mse_test: 1398505656143.7\n",
"max_depth: 8, n_features: 14, mse_train: 590350436688.6, mse_test: 1379732640660.7\n",
"max_depth: 8, n_features: 13, mse_train: 597330891191.9, mse_test: 1209803349967.5\n",
"max_depth: 8, n_features: 12, mse_train: 597330891191.9, mse_test: 1115005152982.1 <-\n",
"max_depth: 8, n_features: 11, mse_train: 557581134904.5, mse_test: 1458540933633.0\n",
"max_depth: 8, n_features: 10, mse_train: 570830636069.8, mse_test: 1578735817222.2\n",
"max_depth: 8, n_features: 9, mse_train: 569650304338.8, mse_test: 1433881734475.8\n",
"max_depth: 8, n_features: 8, mse_train: 573104648489.6, mse_test: 1502189683249.2\n",
"max_depth: 8, n_features: 7, mse_train: 563187464782.4, mse_test: 1375553354162.2\n",
"max_depth: 8, n_features: 6, mse_train: 549292559957.6, mse_test: 1530005089808.0\n",
"max_depth: 8, n_features: 5, mse_train: 508679202857.1, mse_test: 1719509236785.1\n",
"max_depth: 8, n_features: 4, mse_train: 518356817030.3, mse_test: 1522726943641.9\n",
"max_depth: 8, n_features: 3, mse_train: 530467344219.3, mse_test: 1583900127054.1\n",
"max_depth: 8, n_features: 2, mse_train: 840574918161.4, mse_test: 1959195323321.1\n",
"max_depth: 9, n_features: 20, mse_train: 535278923105.8, mse_test: 1235800919626.8\n",
"max_depth: 9, n_features: 19, mse_train: 535278923105.8, mse_test: 1260992057688.6\n",
"max_depth: 9, n_features: 18, mse_train: 556983466445.3, mse_test: 1410233226055.4\n",
"max_depth: 9, n_features: 17, mse_train: 556983466445.3, mse_test: 1487549309939.7\n",
"max_depth: 9, n_features: 16, mse_train: 556990568094.9, mse_test: 1388042124631.4\n",
"max_depth: 9, n_features: 15, mse_train: 554494150822.8, mse_test: 1464222669501.7\n",
"max_depth: 9, n_features: 14, mse_train: 554487180480.0, mse_test: 1032355164763.0 <-\n",
"max_depth: 9, n_features: 13, mse_train: 558206503594.1, mse_test: 1347599535027.3\n",
"max_depth: 9, n_features: 12, mse_train: 558206503594.1, mse_test: 1367310577205.7\n",
"max_depth: 9, n_features: 11, mse_train: 552726857484.2, mse_test: 1490722225296.5\n",
"max_depth: 9, n_features: 10, mse_train: 553755807995.8, mse_test: 1448803928556.4\n",
"max_depth: 9, n_features: 9, mse_train: 554088151006.5, mse_test: 1578725764570.2\n",
"max_depth: 9, n_features: 8, mse_train: 484989533193.1, mse_test: 1437051627725.8\n",
"max_depth: 9, n_features: 7, mse_train: 486975950965.4, mse_test: 1342285901100.4\n",
"max_depth: 9, n_features: 6, mse_train: 478622464423.1, mse_test: 1473164828920.0\n",
"max_depth: 9, n_features: 5, mse_train: 432573475473.1, mse_test: 1680321217679.1\n",
"max_depth: 9, n_features: 4, mse_train: 468243324296.5, mse_test: 1282206447519.6\n",
"max_depth: 9, n_features: 3, mse_train: 632221414282.3, mse_test: 1555661257162.9\n",
"max_depth: 9, n_features: 2, mse_train: 779576331336.2, mse_test: 2030846366295.0\n",
"max_depth: 10, n_features: 20, mse_train: 477981874413.5, mse_test: 1257412564339.1\n",
"max_depth: 10, n_features: 19, mse_train: 478442315188.5, mse_test: 1434519348953.7\n",
"max_depth: 10, n_features: 18, mse_train: 483342739666.9, mse_test: 1446577247965.3\n",
"max_depth: 10, n_features: 17, mse_train: 490221937003.1, mse_test: 1363984337540.8\n",
"max_depth: 10, n_features: 16, mse_train: 489439792580.7, mse_test: 1543380931079.9\n",
"max_depth: 10, n_features: 15, mse_train: 496906427050.9, mse_test: 1633841281853.9\n",
"max_depth: 10, n_features: 14, mse_train: 497520112488.3, mse_test: 1449600244722.9\n",
"max_depth: 10, n_features: 13, mse_train: 501658494242.7, mse_test: 1380728247943.9\n",
"max_depth: 10, n_features: 12, mse_train: 501658494242.7, mse_test: 1454363983728.7\n",
"max_depth: 10, n_features: 11, mse_train: 489829363264.4, mse_test: 1664372030318.3\n",
"max_depth: 10, n_features: 10, mse_train: 488671118005.6, mse_test: 1612139091918.5\n",
"max_depth: 10, n_features: 9, mse_train: 457849519912.5, mse_test: 1502657280958.6\n",
"max_depth: 10, n_features: 8, mse_train: 459807995358.8, mse_test: 1513864214709.3\n",
"max_depth: 10, n_features: 7, mse_train: 404774716895.9, mse_test: 1386087956438.7\n",
"max_depth: 10, n_features: 6, mse_train: 409177169353.2, mse_test: 1610529856196.3\n",
"max_depth: 10, n_features: 5, mse_train: 430759770781.9, mse_test: 1278180267806.7\n",
"max_depth: 10, n_features: 4, mse_train: 418544300813.8, mse_test: 1398710933396.4\n",
"max_depth: 10, n_features: 3, mse_train: 409012092288.3, mse_test: 1486487918171.3\n",
"max_depth: 10, n_features: 2, mse_train: 743684742050.3, mse_test: 2168019410594.0\n",
"max_depth: 11, n_features: 20, mse_train: 416725715554.3, mse_test: 1552418840658.4\n",
"max_depth: 11, n_features: 19, mse_train: 417392535898.3, mse_test: 1182974406963.9\n",
"max_depth: 11, n_features: 18, mse_train: 463393612745.3, mse_test: 1303845378119.8\n",
"max_depth: 11, n_features: 17, mse_train: 444798380141.2, mse_test: 1234422395499.4\n",
"max_depth: 11, n_features: 16, mse_train: 436849570085.9, mse_test: 1195193713252.5\n",
"max_depth: 11, n_features: 15, mse_train: 436849570085.9, mse_test: 1483902499838.3\n",
"max_depth: 11, n_features: 14, mse_train: 418288121657.2, mse_test: 1498839202505.4\n",
"max_depth: 11, n_features: 13, mse_train: 418332890292.9, mse_test: 1483700139479.6\n",
"max_depth: 11, n_features: 12, mse_train: 399243201699.6, mse_test: 1414872223096.5\n",
"max_depth: 11, n_features: 11, mse_train: 399952039592.2, mse_test: 1364836195145.8\n",
"max_depth: 11, n_features: 10, mse_train: 398462598840.9, mse_test: 1518513560007.6\n",
"max_depth: 11, n_features: 9, mse_train: 405000338035.8, mse_test: 1559991110169.3\n",
"max_depth: 11, n_features: 8, mse_train: 415856879256.2, mse_test: 1404528089021.4\n",
"max_depth: 11, n_features: 7, mse_train: 323719625767.4, mse_test: 1419726733331.6\n",
"max_depth: 11, n_features: 6, mse_train: 409078643742.7, mse_test: 1841584998365.5\n",
"max_depth: 11, n_features: 5, mse_train: 347195980618.8, mse_test: 1715057088184.1\n",
"max_depth: 11, n_features: 4, mse_train: 384194651833.2, mse_test: 1293316989636.8\n",
"max_depth: 11, n_features: 3, mse_train: 485121266494.1, mse_test: 1709543614376.7\n",
"max_depth: 11, n_features: 2, mse_train: 714333718125.8, mse_test: 2224215416510.5\n",
"max_depth: 12, n_features: 20, mse_train: 424021593216.7, mse_test: 1550307240970.9\n",
"max_depth: 12, n_features: 19, mse_train: 423996985355.2, mse_test: 1289134735150.6\n",
"max_depth: 12, n_features: 18, mse_train: 421112820764.1, mse_test: 1496960227836.8\n",
"max_depth: 12, n_features: 17, mse_train: 421772490901.8, mse_test: 1425767098574.0\n",
"max_depth: 12, n_features: 16, mse_train: 421772490901.8, mse_test: 1173261398077.8\n",
"max_depth: 12, n_features: 15, mse_train: 364832705279.4, mse_test: 1430131824111.4\n",
"max_depth: 12, n_features: 14, mse_train: 355775743062.1, mse_test: 1280514112363.6\n",
"max_depth: 12, n_features: 13, mse_train: 352975929795.1, mse_test: 1492779993383.1\n",
"max_depth: 12, n_features: 12, mse_train: 352719835141.9, mse_test: 1204499038531.5\n",
"max_depth: 12, n_features: 11, mse_train: 354069569608.4, mse_test: 1032375857213.2\n",
"max_depth: 12, n_features: 10, mse_train: 361461012142.0, mse_test: 1231373063858.6\n",
"max_depth: 12, n_features: 9, mse_train: 352496690921.7, mse_test: 1477650715338.0\n",
"max_depth: 12, n_features: 8, mse_train: 293839876759.7, mse_test: 1475843768581.8\n",
"max_depth: 12, n_features: 7, mse_train: 300469982263.1, mse_test: 1325868695926.0\n",
"max_depth: 12, n_features: 6, mse_train: 259421009073.4, mse_test: 1350045475173.0\n",
"max_depth: 12, n_features: 5, mse_train: 333972356440.9, mse_test: 1338205435230.8\n",
"max_depth: 12, n_features: 4, mse_train: 363386281411.1, mse_test: 1380334618760.4\n",
"max_depth: 12, n_features: 3, mse_train: 337161312627.4, mse_test: 1503970002701.4\n",
"max_depth: 12, n_features: 2, mse_train: 697549285729.4, mse_test: 2216505358868.2\n",
"max_depth: 13, n_features: 20, mse_train: 326900866212.4, mse_test: 1272709957514.6\n",
"max_depth: 13, n_features: 19, mse_train: 370773628675.9, mse_test: 1362867614861.3\n",
"max_depth: 13, n_features: 18, mse_train: 413802536184.0, mse_test: 1594041454103.7\n",
"max_depth: 13, n_features: 17, mse_train: 409910833394.8, mse_test: 1269269851309.2\n",
"max_depth: 13, n_features: 16, mse_train: 408920959510.2, mse_test: 1464664036551.1\n",
"max_depth: 13, n_features: 15, mse_train: 408391261322.5, mse_test: 1528116560331.1\n",
"max_depth: 13, n_features: 14, mse_train: 289021774282.3, mse_test: 1273491265745.6\n",
"max_depth: 13, n_features: 13, mse_train: 288948837436.4, mse_test: 1293400305808.9\n",
"max_depth: 13, n_features: 12, mse_train: 288587714980.1, mse_test: 1390093475183.3\n",
"max_depth: 13, n_features: 11, mse_train: 217409522268.1, mse_test: 1064187535360.5\n",
"max_depth: 13, n_features: 10, mse_train: 219634447071.6, mse_test: 1076690997264.8\n",
"max_depth: 13, n_features: 9, mse_train: 226075644337.6, mse_test: 1262646102151.2\n",
"max_depth: 13, n_features: 8, mse_train: 233947945428.2, mse_test: 1278212853663.7\n",
"max_depth: 13, n_features: 7, mse_train: 222603578244.8, mse_test: 1235392567282.1\n",
"max_depth: 13, n_features: 6, mse_train: 206073462712.5, mse_test: 1060915323340.2\n",
"max_depth: 13, n_features: 5, mse_train: 277848834374.4, mse_test: 1209382633518.8\n",
"max_depth: 13, n_features: 4, mse_train: 337469158806.1, mse_test: 1370262367697.3\n",
"max_depth: 13, n_features: 3, mse_train: 396048327549.5, mse_test: 1711114015358.1\n",
"max_depth: 13, n_features: 2, mse_train: 680324528339.2, mse_test: 2287852137283.0\n",
"max_depth: 14, n_features: 20, mse_train: 334016286802.3, mse_test: 1204076681219.3\n",
"max_depth: 14, n_features: 19, mse_train: 334575228513.3, mse_test: 1482593267162.9\n",
"max_depth: 14, n_features: 18, mse_train: 334579988171.9, mse_test: 1345345492191.0\n",
"max_depth: 14, n_features: 17, mse_train: 288693871319.2, mse_test: 1257775313637.5\n",
"max_depth: 14, n_features: 16, mse_train: 288699162744.5, mse_test: 1230281180727.9\n",
"max_depth: 14, n_features: 15, mse_train: 246009978122.7, mse_test: 1301656081776.7\n",
"max_depth: 14, n_features: 14, mse_train: 243548648577.9, mse_test: 1257384597789.8\n",
"max_depth: 14, n_features: 13, mse_train: 240153595680.5, mse_test: 1081139368056.0\n",
"max_depth: 14, n_features: 12, mse_train: 242686689810.6, mse_test: 1392243929363.7\n",
"max_depth: 14, n_features: 11, mse_train: 242692914710.0, mse_test: 1290996774974.0\n",
"max_depth: 14, n_features: 10, mse_train: 242630493459.2, mse_test: 1405134434140.2\n",
"max_depth: 14, n_features: 9, mse_train: 199087898232.6, mse_test: 1191089503939.3\n",
"max_depth: 14, n_features: 8, mse_train: 203704038887.1, mse_test: 1443698228005.1\n",
"max_depth: 14, n_features: 7, mse_train: 201997271890.9, mse_test: 1377709992754.4\n",
"max_depth: 14, n_features: 6, mse_train: 201450093050.1, mse_test: 1076764654390.9\n",
"max_depth: 14, n_features: 5, mse_train: 235269853854.8, mse_test: 1192401603471.1\n",
"max_depth: 14, n_features: 4, mse_train: 307256256523.8, mse_test: 1370887604660.0\n",
"max_depth: 14, n_features: 3, mse_train: 305425215366.8, mse_test: 1514296867478.7\n",
"max_depth: 14, n_features: 2, mse_train: 677761842740.6, mse_test: 2292210089482.5\n",
"max_depth: 15, n_features: 20, mse_train: 296033323492.8, mse_test: 1199283904522.9\n",
"max_depth: 15, n_features: 19, mse_train: 242678219821.0, mse_test: 1101522025193.1\n",
"max_depth: 15, n_features: 18, mse_train: 242678219821.0, mse_test: 1487956702249.7\n",
"max_depth: 15, n_features: 17, mse_train: 242865553929.2, mse_test: 1241352030456.0\n",
"max_depth: 15, n_features: 16, mse_train: 243727826670.5, mse_test: 1318839918591.2\n",
"max_depth: 15, n_features: 15, mse_train: 243857052518.4, mse_test: 1253681989464.0\n",
"max_depth: 15, n_features: 14, mse_train: 245669813380.7, mse_test: 1152020772119.4\n",
"max_depth: 15, n_features: 13, mse_train: 251033541531.2, mse_test: 1695617954247.6\n",
"max_depth: 15, n_features: 12, mse_train: 251118073572.4, mse_test: 1780299655166.2\n",
"max_depth: 15, n_features: 11, mse_train: 237022506027.9, mse_test: 1747775306691.7\n",
"max_depth: 15, n_features: 10, mse_train: 228699140524.8, mse_test: 1770080154100.6\n",
"max_depth: 15, n_features: 9, mse_train: 230999974547.2, mse_test: 1249927459415.7\n",
"max_depth: 15, n_features: 8, mse_train: 146733112412.8, mse_test: 1077109079098.7\n",
"max_depth: 15, n_features: 7, mse_train: 150087100870.5, mse_test: 1238353911628.7\n",
"max_depth: 15, n_features: 6, mse_train: 202716271510.6, mse_test: 1220972425827.8\n",
"max_depth: 15, n_features: 5, mse_train: 209281946196.2, mse_test: 1123618552167.6\n",
"max_depth: 15, n_features: 4, mse_train: 244419255770.9, mse_test: 1465545478593.7\n",
"max_depth: 15, n_features: 3, mse_train: 284198758490.5, mse_test: 1442028281990.6\n",
"max_depth: 15, n_features: 2, mse_train: 675729031674.7, mse_test: 2289119927579.1\n",
"max_depth: 16, n_features: 20, mse_train: 288140324352.4, mse_test: 1510365082965.7\n",
"max_depth: 16, n_features: 19, mse_train: 211508275060.7, mse_test: 1440666244875.9\n",
"max_depth: 16, n_features: 18, mse_train: 213726588121.6, mse_test: 1434562330348.8\n",
"max_depth: 16, n_features: 17, mse_train: 213726588121.6, mse_test: 1347888915463.1\n",
"max_depth: 16, n_features: 16, mse_train: 213241163989.5, mse_test: 1352106364953.5\n",
"max_depth: 16, n_features: 15, mse_train: 208602868794.6, mse_test: 1232196368066.8\n",
"max_depth: 16, n_features: 14, mse_train: 208706161927.1, mse_test: 1242990604993.6\n",
"max_depth: 16, n_features: 13, mse_train: 174038140706.1, mse_test: 1221750109283.4\n",
"max_depth: 16, n_features: 12, mse_train: 174242113490.9, mse_test: 1105134987771.2\n",
"max_depth: 16, n_features: 11, mse_train: 174142848270.4, mse_test: 1007478987995.9 <-\n",
"max_depth: 16, n_features: 10, mse_train: 174961135964.7, mse_test: 1193898873983.6\n",
"max_depth: 16, n_features: 9, mse_train: 169284705489.0, mse_test: 1226803337767.2\n",
"max_depth: 16, n_features: 8, mse_train: 111948900290.2, mse_test: 1369097264335.9\n",
"max_depth: 16, n_features: 7, mse_train: 292484025380.9, mse_test: 1686383882704.4\n",
"max_depth: 16, n_features: 6, mse_train: 223585535630.0, mse_test: 1719149252266.6\n",
"max_depth: 16, n_features: 5, mse_train: 154548813658.6, mse_test: 1494087001176.9\n",
"max_depth: 16, n_features: 4, mse_train: 214045689563.6, mse_test: 1313968357091.3\n",
"max_depth: 16, n_features: 3, mse_train: 252733890830.4, mse_test: 1409855848518.7\n",
"max_depth: 16, n_features: 2, mse_train: 673091235192.7, mse_test: 2295462882349.6\n",
"max_depth: 17, n_features: 20, mse_train: 201270505979.9, mse_test: 1357840701303.8\n",
"max_depth: 17, n_features: 19, mse_train: 201289528365.0, mse_test: 1374166404469.6\n",
"max_depth: 17, n_features: 18, mse_train: 201361451033.1, mse_test: 1388733834768.3\n",
"max_depth: 17, n_features: 17, mse_train: 201354276447.9, mse_test: 1291104821210.8\n",
"max_depth: 17, n_features: 16, mse_train: 201393174045.6, mse_test: 1370455012584.9\n",
"max_depth: 17, n_features: 15, mse_train: 171599584298.4, mse_test: 1197094361358.0\n",
"max_depth: 17, n_features: 14, mse_train: 171600643024.2, mse_test: 1150016000780.4\n",
"max_depth: 17, n_features: 13, mse_train: 172213613910.6, mse_test: 1345922521125.9\n",
"max_depth: 17, n_features: 12, mse_train: 147079891814.2, mse_test: 1389399785957.7\n",
"max_depth: 17, n_features: 11, mse_train: 108282072242.9, mse_test: 1293247288696.6\n",
"max_depth: 17, n_features: 10, mse_train: 103391257737.6, mse_test: 1174658490101.2\n",
"max_depth: 17, n_features: 9, mse_train: 115312087684.4, mse_test: 1313368012017.6\n",
"max_depth: 17, n_features: 8, mse_train: 107332201181.6, mse_test: 1096206501023.0\n",
"max_depth: 17, n_features: 7, mse_train: 100715021906.7, mse_test: 1254087467758.4\n",
"max_depth: 17, n_features: 6, mse_train: 106297912517.5, mse_test: 1328729052866.7\n",
"max_depth: 17, n_features: 5, mse_train: 121756789096.6, mse_test: 1226402990577.2\n",
"max_depth: 17, n_features: 4, mse_train: 188286233698.2, mse_test: 1311822594980.3\n",
"max_depth: 17, n_features: 3, mse_train: 222136949866.6, mse_test: 1312277262365.1\n",
"max_depth: 17, n_features: 2, mse_train: 671799209176.2, mse_test: 2303478638550.0\n",
"max_depth: 18, n_features: 20, mse_train: 152831992492.7, mse_test: 1209461207250.2\n",
"max_depth: 18, n_features: 19, mse_train: 139730657588.1, mse_test: 1102399672099.0\n",
"max_depth: 18, n_features: 18, mse_train: 157323947880.5, mse_test: 1375218408164.1\n",
"max_depth: 18, n_features: 17, mse_train: 157478734571.4, mse_test: 1358769890271.5\n",
"max_depth: 18, n_features: 16, mse_train: 157984636263.6, mse_test: 1241705010949.1\n",
"max_depth: 18, n_features: 15, mse_train: 157424524072.3, mse_test: 1402681240406.2\n",
"max_depth: 18, n_features: 14, mse_train: 120131329224.5, mse_test: 1492069534262.4\n",
"max_depth: 18, n_features: 13, mse_train: 119500757995.5, mse_test: 1120319715387.4\n",
"max_depth: 18, n_features: 12, mse_train: 121029789076.8, mse_test: 1060243248491.4\n",
"max_depth: 18, n_features: 11, mse_train: 108415505352.8, mse_test: 1304228167204.5\n",
"max_depth: 18, n_features: 10, mse_train: 81034397031.0, mse_test: 1370722673122.4\n",
"max_depth: 18, n_features: 9, mse_train: 80859900170.5, mse_test: 1656330294051.0\n",
"max_depth: 18, n_features: 8, mse_train: 94285259081.7, mse_test: 1440950367711.9\n",
"max_depth: 18, n_features: 7, mse_train: 81087235240.0, mse_test: 1214216017201.5\n",
"max_depth: 18, n_features: 6, mse_train: 66983219816.5, mse_test: 1119848519674.8\n",
"max_depth: 18, n_features: 5, mse_train: 138272666271.2, mse_test: 924309503293.0 <-\n",
"max_depth: 18, n_features: 4, mse_train: 155481113509.0, mse_test: 1337096016251.0\n",
"max_depth: 18, n_features: 3, mse_train: 197239862148.3, mse_test: 1368894405814.7\n",
"max_depth: 18, n_features: 2, mse_train: 670285071135.2, mse_test: 2303218298775.9\n",
"max_depth: 19, n_features: 20, mse_train: 229914843440.3, mse_test: 1238488815537.5\n",
"max_depth: 19, n_features: 19, mse_train: 229727257353.9, mse_test: 1465210025524.1\n",
"max_depth: 19, n_features: 18, mse_train: 139494888531.6, mse_test: 1163444865180.7\n",
"max_depth: 19, n_features: 17, mse_train: 140696102306.3, mse_test: 1617550712690.5\n",
"max_depth: 19, n_features: 16, mse_train: 138313118252.9, mse_test: 1340624456194.5\n",
"max_depth: 19, n_features: 15, mse_train: 109466142177.2, mse_test: 1068546497722.5\n",
"max_depth: 19, n_features: 14, mse_train: 103979181917.5, mse_test: 1335710346792.0\n",
"max_depth: 19, n_features: 13, mse_train: 104626791389.5, mse_test: 986621190686.0\n",
"max_depth: 19, n_features: 12, mse_train: 104028504624.2, mse_test: 1167695265629.7\n",
"max_depth: 19, n_features: 11, mse_train: 103750532322.5, mse_test: 1117696215118.3\n",
"max_depth: 19, n_features: 10, mse_train: 89912027092.1, mse_test: 1292551304758.2\n",
"max_depth: 19, n_features: 9, mse_train: 58720228785.7, mse_test: 1171814403090.5\n",
"max_depth: 19, n_features: 8, mse_train: 58564437822.0, mse_test: 1264077177824.6\n",
"max_depth: 19, n_features: 7, mse_train: 54032197778.9, mse_test: 1391914893899.8\n",
"max_depth: 19, n_features: 6, mse_train: 19108020802.8, mse_test: 1292213870022.3\n",
"max_depth: 19, n_features: 5, mse_train: 81642224014.8, mse_test: 1184491331827.7\n",
"max_depth: 19, n_features: 4, mse_train: 138352235193.8, mse_test: 1169527657863.8\n",
"max_depth: 19, n_features: 3, mse_train: 169584878103.6, mse_test: 1340514157746.4\n",
"max_depth: 19, n_features: 2, mse_train: 669055030059.1, mse_test: 2305768427003.3\n",
"max_depth: 20, n_features: 20, mse_train: 99818647306.9, mse_test: 1232861338365.6\n",
"max_depth: 20, n_features: 19, mse_train: 99818636202.7, mse_test: 1409618818334.5\n",
"max_depth: 20, n_features: 18, mse_train: 94440380872.8, mse_test: 1369044209909.5\n",
"max_depth: 20, n_features: 17, mse_train: 125415025554.3, mse_test: 1543791579848.3\n",
"max_depth: 20, n_features: 16, mse_train: 125512001705.3, mse_test: 1412722891501.0\n",
"max_depth: 20, n_features: 15, mse_train: 94408017936.3, mse_test: 1256265257689.8\n",
"max_depth: 20, n_features: 14, mse_train: 89060147269.1, mse_test: 961285177059.1\n",
"max_depth: 20, n_features: 13, mse_train: 89643306022.6, mse_test: 879912454221.0 <-\n",
"max_depth: 20, n_features: 12, mse_train: 90295978214.3, mse_test: 1123639446094.5\n",
"max_depth: 20, n_features: 11, mse_train: 59437782375.2, mse_test: 1242649913627.1\n",
"max_depth: 20, n_features: 10, mse_train: 46604716119.1, mse_test: 1202278080826.4\n",
"max_depth: 20, n_features: 9, mse_train: 46246508227.5, mse_test: 1311829935890.1\n",
"max_depth: 20, n_features: 8, mse_train: 46777150202.9, mse_test: 1501930355823.5\n",
"max_depth: 20, n_features: 7, mse_train: 46315432892.2, mse_test: 1106751860516.1\n",
"max_depth: 20, n_features: 6, mse_train: 57300759689.8, mse_test: 1346829091643.2\n",
"max_depth: 20, n_features: 5, mse_train: 63292945243.9, mse_test: 1165413114524.4\n",
"max_depth: 20, n_features: 4, mse_train: 111031406683.0, mse_test: 1140112215185.6\n",
"max_depth: 20, n_features: 3, mse_train: 134454145355.2, mse_test: 1290965931407.2\n",
"max_depth: 20, n_features: 2, mse_train: 667901897756.1, mse_test: 2305980504297.8\n",
"max_depth: 21, n_features: 20, mse_train: 176071825306.2, mse_test: 1303255156046.1\n",
"max_depth: 21, n_features: 19, mse_train: 172092643462.5, mse_test: 1581387413486.2\n",
"max_depth: 21, n_features: 18, mse_train: 81379303765.5, mse_test: 1458698766791.3\n",
"max_depth: 21, n_features: 17, mse_train: 113642528347.3, mse_test: 1469853541707.2\n",
"max_depth: 21, n_features: 16, mse_train: 108756257845.7, mse_test: 1386594881248.7\n",
"max_depth: 21, n_features: 15, mse_train: 80234429955.4, mse_test: 1381565692159.9\n",
"max_depth: 21, n_features: 14, mse_train: 77531830843.2, mse_test: 1199477427106.2\n",
"max_depth: 21, n_features: 13, mse_train: 76941363189.7, mse_test: 913116853373.1\n",
"max_depth: 21, n_features: 12, mse_train: 76937857696.7, mse_test: 1083489883486.7\n",
"max_depth: 21, n_features: 11, mse_train: 42664001241.9, mse_test: 1206385134705.6\n",
"max_depth: 21, n_features: 10, mse_train: 42975098149.2, mse_test: 1344484961821.7\n",
"max_depth: 21, n_features: 9, mse_train: 26502867875.2, mse_test: 1267874780351.7\n",
"max_depth: 21, n_features: 8, mse_train: 29166057473.1, mse_test: 1354778058778.7\n",
"max_depth: 21, n_features: 7, mse_train: 23928844473.3, mse_test: 1291562326052.7\n",
"max_depth: 21, n_features: 6, mse_train: 6369250224.5, mse_test: 1250904154555.1\n",
"max_depth: 21, n_features: 5, mse_train: 44903116798.1, mse_test: 1107717254082.6\n",
"max_depth: 21, n_features: 4, mse_train: 86096850822.3, mse_test: 1126414191599.2\n",
"max_depth: 21, n_features: 3, mse_train: 102690661204.1, mse_test: 1230017537500.6\n",
"max_depth: 21, n_features: 2, mse_train: 194428901800.2, mse_test: 1066274325803.1\n",
"max_depth: 22, n_features: 20, mse_train: 104489417403.5, mse_test: 1561793950317.6\n",
"max_depth: 22, n_features: 19, mse_train: 103866801451.6, mse_test: 1366406104592.0\n",
"max_depth: 22, n_features: 18, mse_train: 104927002403.9, mse_test: 1231386974035.7\n",
"max_depth: 22, n_features: 17, mse_train: 104929684394.0, mse_test: 1452565465353.3\n",
"max_depth: 22, n_features: 16, mse_train: 68081719827.6, mse_test: 1208543673748.4\n",
"max_depth: 22, n_features: 15, mse_train: 61259240771.7, mse_test: 1260166858299.0\n",
"max_depth: 22, n_features: 14, mse_train: 61042629140.1, mse_test: 934177960432.3\n",
"max_depth: 22, n_features: 13, mse_train: 61415595373.7, mse_test: 968723647299.6\n",
"max_depth: 22, n_features: 12, mse_train: 59534628185.2, mse_test: 1126929566173.8\n",
"max_depth: 22, n_features: 11, mse_train: 33182524659.2, mse_test: 1277672529886.9\n",
"max_depth: 22, n_features: 10, mse_train: 33527623941.3, mse_test: 1410480647443.2\n",
"max_depth: 22, n_features: 9, mse_train: 10626247469.6, mse_test: 1309029635507.7\n",
"max_depth: 22, n_features: 8, mse_train: 10688173623.2, mse_test: 1355927090505.1\n",
"max_depth: 22, n_features: 7, mse_train: 15687267712.7, mse_test: 1683363669059.5\n",
"max_depth: 22, n_features: 6, mse_train: 1587154099.0, mse_test: 1452632595762.0\n",
"max_depth: 22, n_features: 5, mse_train: 35471732026.0, mse_test: 1081779221280.1\n",
"max_depth: 22, n_features: 4, mse_train: 47807680266.2, mse_test: 1023039716965.3\n",
"max_depth: 22, n_features: 3, mse_train: 65757333996.1, mse_test: 1236459583746.5\n",
"max_depth: 22, n_features: 2, mse_train: 135268910244.4, mse_test: 1065979937243.9\n",
"max_depth: 23, n_features: 20, mse_train: 83956586465.5, mse_test: 1496072927827.8\n",
"max_depth: 23, n_features: 19, mse_train: 83992520283.1, mse_test: 1515116942810.8\n",
"max_depth: 23, n_features: 18, mse_train: 84011579417.1, mse_test: 1260608301719.3\n",
"max_depth: 23, n_features: 17, mse_train: 74421801173.4, mse_test: 1374591557320.4\n",
"max_depth: 23, n_features: 16, mse_train: 74282312827.3, mse_test: 1598478038477.0\n",
"max_depth: 23, n_features: 15, mse_train: 52267132533.8, mse_test: 919318584073.2\n",
"max_depth: 23, n_features: 14, mse_train: 52538552410.6, mse_test: 1239026454455.8\n",
"max_depth: 23, n_features: 13, mse_train: 50456013190.3, mse_test: 1069689581221.0\n",
"max_depth: 23, n_features: 12, mse_train: 43911206137.5, mse_test: 1051643374311.4\n",
"max_depth: 23, n_features: 11, mse_train: 44449723839.3, mse_test: 1089129203341.7\n",
"max_depth: 23, n_features: 10, mse_train: 20620286807.9, mse_test: 1159284624313.6\n",
"max_depth: 23, n_features: 9, mse_train: 20758968369.9, mse_test: 1282815778915.7\n",
"max_depth: 23, n_features: 8, mse_train: 24158069851.9, mse_test: 1372686461680.3\n",
"max_depth: 23, n_features: 7, mse_train: 13764214478.8, mse_test: 1191367174285.8\n",
"max_depth: 23, n_features: 6, mse_train: 1095743495.4, mse_test: 1250152844697.8\n",
"max_depth: 23, n_features: 5, mse_train: 44419633244.9, mse_test: 1443310784349.3\n",
"max_depth: 23, n_features: 4, mse_train: 51679711818.2, mse_test: 1113152388953.6\n",
"max_depth: 23, n_features: 3, mse_train: 45932745214.8, mse_test: 1280771993094.8\n",
"max_depth: 23, n_features: 2, mse_train: 665122219863.5, mse_test: 2316039506992.8\n",
"max_depth: 24, n_features: 20, mse_train: 93434908701.4, mse_test: 1492624132722.0\n",
"max_depth: 24, n_features: 19, mse_train: 75031262408.6, mse_test: 1690561746636.5\n",
"max_depth: 24, n_features: 18, mse_train: 80042434120.4, mse_test: 1404385854408.3\n",
"max_depth: 24, n_features: 17, mse_train: 80219694036.2, mse_test: 1352552644970.9\n",
"max_depth: 24, n_features: 16, mse_train: 80365479777.5, mse_test: 1314988411444.3\n",
"max_depth: 24, n_features: 15, mse_train: 40529113743.5, mse_test: 1216484047884.8\n",
"max_depth: 24, n_features: 14, mse_train: 36140047521.2, mse_test: 1059101335194.7\n",
"max_depth: 24, n_features: 13, mse_train: 34324239307.8, mse_test: 883544711827.3\n",
"max_depth: 24, n_features: 12, mse_train: 34767946437.8, mse_test: 989146098066.4\n",
"max_depth: 24, n_features: 11, mse_train: 25211970404.3, mse_test: 931215946860.1\n",
"max_depth: 24, n_features: 10, mse_train: 4573532339.0, mse_test: 1140621506935.4\n",
"max_depth: 24, n_features: 9, mse_train: 4320874047.0, mse_test: 1240978215172.8\n",
"max_depth: 24, n_features: 8, mse_train: 8515000026.9, mse_test: 1341818219250.8\n",
"max_depth: 24, n_features: 7, mse_train: 12743847457.5, mse_test: 1220985719094.9\n",
"max_depth: 24, n_features: 6, mse_train: 2907903557.1, mse_test: 963583401536.3\n",
"max_depth: 24, n_features: 5, mse_train: 15587890946.6, mse_test: 1120417043231.8\n",
"max_depth: 24, n_features: 4, mse_train: 42598954160.5, mse_test: 1123769747925.0\n",
"max_depth: 24, n_features: 3, mse_train: 25853836524.7, mse_test: 1204449413786.6\n",
"max_depth: 24, n_features: 2, mse_train: 94531956262.6, mse_test: 1028748493563.2\n",
"max_depth: 25, n_features: 20, mse_train: 65278675204.5, mse_test: 1355784806220.1\n",
"max_depth: 25, n_features: 19, mse_train: 63709623835.1, mse_test: 1342343310703.4\n",
"max_depth: 25, n_features: 18, mse_train: 68766978492.4, mse_test: 1423375943739.1\n",
"max_depth: 25, n_features: 17, mse_train: 42444963094.6, mse_test: 1405036150658.1\n",
"max_depth: 25, n_features: 16, mse_train: 42437066410.1, mse_test: 1241971435103.1\n",
"max_depth: 25, n_features: 15, mse_train: 29678288006.6, mse_test: 975694605636.3\n",
"max_depth: 25, n_features: 14, mse_train: 29576148620.7, mse_test: 1123734178435.9\n",
"max_depth: 25, n_features: 13, mse_train: 30732235569.3, mse_test: 1079791526511.7\n",
"max_depth: 25, n_features: 12, mse_train: 25038488371.4, mse_test: 1286559673326.0\n",
"max_depth: 25, n_features: 11, mse_train: 25558913939.2, mse_test: 926827570852.4\n",
"max_depth: 25, n_features: 10, mse_train: 5663963296.9, mse_test: 1487725398939.6\n",
"max_depth: 25, n_features: 9, mse_train: 5010489285.1, mse_test: 1392355900608.1\n",
"max_depth: 25, n_features: 8, mse_train: 1990142618.8, mse_test: 1220478081159.5\n",
"max_depth: 25, n_features: 7, mse_train: 459420252.4, mse_test: 1225244334264.1\n",
"max_depth: 25, n_features: 6, mse_train: 445903174.5, mse_test: 1061203201781.0\n",
"max_depth: 25, n_features: 5, mse_train: 7789846208.2, mse_test: 1073538713267.4\n",
"max_depth: 25, n_features: 4, mse_train: 35222330370.0, mse_test: 1129038949059.7\n",
"max_depth: 25, n_features: 3, mse_train: 17685619506.6, mse_test: 1223079813449.3\n",
"max_depth: 25, n_features: 2, mse_train: 664759876307.5, mse_test: 2317782985074.5\n",
"max_depth: 26, n_features: 20, mse_train: 42993707681.4, mse_test: 1604904942020.1\n",
"max_depth: 26, n_features: 19, mse_train: 54780684379.6, mse_test: 1590779779077.3\n",
"max_depth: 26, n_features: 18, mse_train: 54920175020.0, mse_test: 1270013909732.2\n",
"max_depth: 26, n_features: 17, mse_train: 52199196631.2, mse_test: 1217786560326.2\n",
"max_depth: 26, n_features: 16, mse_train: 8035367880.5, mse_test: 1279991806941.3\n",
"max_depth: 26, n_features: 15, mse_train: 4181423603.1, mse_test: 1488625419035.4\n",
"max_depth: 26, n_features: 14, mse_train: 4358364326.0, mse_test: 1176626013623.0\n",
"max_depth: 26, n_features: 13, mse_train: 3967318247.2, mse_test: 1332774096639.1\n",
"max_depth: 26, n_features: 12, mse_train: 5302763500.7, mse_test: 1160540897591.6\n",
"max_depth: 26, n_features: 11, mse_train: 5413367344.8, mse_test: 1061923109034.3\n",
"max_depth: 26, n_features: 10, mse_train: 7867580794.5, mse_test: 1160901704143.8\n",
"max_depth: 26, n_features: 9, mse_train: 8360277141.4, mse_test: 1186433178325.8\n",
"max_depth: 26, n_features: 8, mse_train: 2288267848.9, mse_test: 1508735098235.1\n",
"max_depth: 26, n_features: 7, mse_train: 2448965249.1, mse_test: 1256712606896.6\n",
"max_depth: 26, n_features: 6, mse_train: 155631833.6, mse_test: 1036017490193.2\n",
"max_depth: 26, n_features: 5, mse_train: 4308924411.2, mse_test: 1054265862113.9\n",
"max_depth: 26, n_features: 4, mse_train: 31130594885.1, mse_test: 1175963944751.0\n",
"max_depth: 26, n_features: 3, mse_train: 12850611389.1, mse_test: 1201096981875.0\n",
"max_depth: 26, n_features: 2, mse_train: 69407859881.2, mse_test: 1048353738665.2\n",
"max_depth: 27, n_features: 20, mse_train: 37929075234.1, mse_test: 1495534126323.5\n",
"max_depth: 27, n_features: 19, mse_train: 37004116084.5, mse_test: 1503503047226.6\n",
"max_depth: 27, n_features: 18, mse_train: 41603694561.8, mse_test: 1408204384970.6\n",
"max_depth: 27, n_features: 17, mse_train: 30349013888.5, mse_test: 1492403719402.8\n",
"max_depth: 27, n_features: 16, mse_train: 38722569959.6, mse_test: 1651509764428.8\n",
"max_depth: 27, n_features: 15, mse_train: 11840723433.7, mse_test: 1038002973924.0\n",
"max_depth: 27, n_features: 14, mse_train: 9895380075.4, mse_test: 1281611100933.8\n",
"max_depth: 27, n_features: 13, mse_train: 8929197893.3, mse_test: 1131336461767.0\n",
"max_depth: 27, n_features: 12, mse_train: 9140695805.2, mse_test: 903726672128.5\n",
"max_depth: 27, n_features: 11, mse_train: 4396975783.7, mse_test: 1211979070130.1\n",
"max_depth: 27, n_features: 10, mse_train: 4336006852.2, mse_test: 1085573573322.2\n",
"max_depth: 27, n_features: 9, mse_train: 182277177.6, mse_test: 923981085402.1\n",
"max_depth: 27, n_features: 8, mse_train: 253370526.0, mse_test: 922773302183.0\n",
"max_depth: 27, n_features: 7, mse_train: 2863339813.1, mse_test: 1147364680118.7\n",
"max_depth: 27, n_features: 6, mse_train: 7272.9, mse_test: 1175304944956.2\n",
"max_depth: 27, n_features: 5, mse_train: 3403485178.1, mse_test: 1075097884124.8\n",
"max_depth: 27, n_features: 4, mse_train: 23783872433.2, mse_test: 1192393635469.4\n",
"max_depth: 27, n_features: 3, mse_train: 8105747511.5, mse_test: 1224672727488.6\n",
"max_depth: 27, n_features: 2, mse_train: 66022684853.2, mse_test: 1048931809459.1\n",
"max_depth: 28, n_features: 20, mse_train: 46651558596.1, mse_test: 1488067351628.6\n",
"max_depth: 28, n_features: 19, mse_train: 46651544698.0, mse_test: 1342267182895.0\n",
"max_depth: 28, n_features: 18, mse_train: 46701414853.5, mse_test: 1528952552864.8\n",
"max_depth: 28, n_features: 17, mse_train: 43090311447.9, mse_test: 1670489027349.9\n",
"max_depth: 28, n_features: 16, mse_train: 43079340624.0, mse_test: 1358118262757.2\n",
"max_depth: 28, n_features: 15, mse_train: 26018818502.0, mse_test: 1448782524839.2\n",
"max_depth: 28, n_features: 14, mse_train: 6126351404.2, mse_test: 1136330953814.9\n",
"max_depth: 28, n_features: 13, mse_train: 5197161310.7, mse_test: 971593005235.8\n",
"max_depth: 28, n_features: 12, mse_train: 5197161310.7, mse_test: 1263639378492.3\n",
"max_depth: 28, n_features: 11, mse_train: 5673642287.6, mse_test: 1032533277517.4\n",
"max_depth: 28, n_features: 10, mse_train: 5048611695.4, mse_test: 1147372164198.8\n",
"max_depth: 28, n_features: 9, mse_train: 3102339115.6, mse_test: 1346971955277.1\n",
"max_depth: 28, n_features: 8, mse_train: 1449114301.5, mse_test: 1305299937071.3\n",
"max_depth: 28, n_features: 7, mse_train: 464765009.2, mse_test: 1286283590615.3\n",
"max_depth: 28, n_features: 6, mse_train: 0.0, mse_test: 1211697559449.3\n",
"max_depth: 28, n_features: 5, mse_train: 2470313319.8, mse_test: 1096808443528.2\n",
"max_depth: 28, n_features: 4, mse_train: 20728732736.9, mse_test: 1163987068645.2\n",
"max_depth: 28, n_features: 3, mse_train: 5002178892.2, mse_test: 1127689867980.4\n",
"max_depth: 28, n_features: 2, mse_train: 664649701829.0, mse_test: 2317579435548.2\n"
]
}
],
"source": [
"param_grid = {\n",
" 'max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28],\n",
" 'n_features_to_select': [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2]\n",
"}\n",
"\n",
"best_mse = np.inf\n",
"best_model = None\n",
"best_params = None\n",
"\n",
"results_train = []\n",
"results_test = []\n",
"\n",
"for params in [{'max_depth': max_depth, 'n_features_to_select': n_features} \n",
" for max_depth in param_grid['max_depth'] \n",
" for n_features in param_grid['n_features_to_select']]:\n",
"\n",
" selector = RFE(estimator=DecisionTreeRegressor(random_state=0, max_depth=params['max_depth']), n_features_to_select=params['n_features_to_select'])\n",
" selector.fit(X_train, y_train)\n",
" X_train_selected = selector.transform(X_train)\n",
" X_test_selected = selector.transform(X_test)\n",
" \n",
" model = DecisionTreeRegressor(random_state=0, max_depth=params['max_depth'])\n",
" model.fit(X_train_selected, y_train)\n",
" \n",
" y_pred_train = model.predict(X_train_selected)\n",
" y_pred_test = model.predict(X_test_selected)\n",
"\n",
" mse_train = mean_squared_error(y_pred_train, y_train)\n",
" mse_test = mean_squared_error(y_pred_test, y_test)\n",
"\n",
2024-05-22 13:01:06 +02:00
" results_train.append((params['max_depth'], params['n_features_to_select'], mse_train))\n",
" results_test.append((params['max_depth'], params['n_features_to_select'], mse_test))\n",
2024-05-06 23:50:22 +02:00
"\n",
2024-05-22 13:01:06 +02:00
" if mse_test < best_mse:\n",
" best_mse = mse_test\n",
" best_model = (model, selector)\n",
" best_params = (params['max_depth'], params['n_features_to_select'])\n",
2024-05-06 23:50:22 +02:00
"\n",
2024-05-22 13:01:06 +02:00
" print(f\"max_depth: {params['max_depth']:>{2}}, \"\n",
" f\"n_features: {params['n_features_to_select']:>{2}}, \"\n",
" f\"mse_train: {mse_train:>{20}.1f}, \"\n",
" f\"mse_test: {mse_test:>{20}.1f} <-\")\n",
" else:\n",
" print(f\"max_depth: {params['max_depth']:>{2}}, \"\n",
" f\"n_features: {params['n_features_to_select']:>{2}}, \"\n",
" f\"mse_train: {mse_train:>{20}.1f}, \"\n",
" f\"mse_test: {mse_test:>{20}.1f}\")"
]
},
{
"cell_type": "code",
2024-05-22 13:01:06 +02:00
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-22 13:01:06 +02:00
"(20, 13)\n"
]
}
],
"source": [
2024-05-22 13:01:06 +02:00
"print(best_params)"
]
},
{
"cell_type": "code",
2024-05-22 13:01:06 +02:00
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
2024-05-22 13:01:06 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZkAAAGHCAYAAABieS8lAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOz9eZBk6Vnfj37OmvteW1dVd1X1Mt09+0zP2iMJCSQGIQsEP35gi00owMYYg6V7Lz9kwETYF3QJ2UIOIVvh8E8WujYG7CsbsARGmtFIaGY0e+/Ve3fta+7rWd/7R1ZmZ1ZlVVdVV3Vnz5xPhCI01XXOefNUnvd73ud9nu8jCSEEHh4eHh4eu4B8pwfg4eHh4fH2xRMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZDw8PD49dwxMZj9uOEALHcRBC3OmheHh47DLqnR6AxzsL13WxLItKpYIsyyiKgqqqqKqKoihIknSnh+jh4bGDSMJ7nfS4DTRWL7Zt47oupmk2fy6EQJKkpuhomoaiKJ7oeHi8DfBExmPXEUJgWRaO4zR/Zpomsiw3/x3qq5zVotO6yvFEx8Pj7sMTGY9dxXEcLMvCdV1kWUaSpGbIbD3B8ETHw+PtgycyHruCEALbtrFtGyFEU2CAZrhMkqRNiUSr6Liu2zzGEx0Pj+7HExmPHcd1XXK5HKqqomnaGjHZqsisprG6EUKsEZ3Gfo6qqm3C5uHhcWfwsss8dozGpG9ZFqdPn2bfvn0MDg7u+HVaxUlRlDbRqdVqbb+jKAo+nw9FUTzR8fC4A3gi47EjtIbHgNs6ma8nOqVSibfeeovjx48393Q0TWuG1zzR8fDYfTyR8bhlGqsXx3GaE7ckSXes2LJ1FQOgqmrHlc7qGh1PdDw8dh5PZDy2zeral9ZJeiORuZ0TeSMzrdNKx3XdpujIsrwmkcATHQ+PW8cTGY9tsbr2ZfWEfLOVzJ2cvNcTHcdxcByHWq3miY6Hxw7hiYzHlmlkh61evbRyJ8NlrWPY7O819myANaJjGEbbnk4jzLbd7DgPj3cSnsh4bJrGxGtZ1pral9V0g8hsl41Ex7bttj2f1b5rnuh4eLTjiYzHprhZeGw13SIyOzGG9UTHtu2mc0FDdFp91xq/7+HxTsYTGY+b0rp62ezbereIzG7giY6Hx+bxRMZjXTrVvmxln+NOi8ztCl3dTHSApsNBLBbzRMfjHYUnMh4dcV2XbDbL9PQ099xzz5YnxG4QmTtFJ9GZnp6mXC5z+PDh5r+v9l3zRMfj7YgnMh5ttFrDVKtVFhYWOHLkyJbP0y0i0w1jaBUdTdPa7nFjpbNadBrZax4edzueyHg0Wb2536gh2Q7dIDLdNEm33otWN4LGvzVEp9U4tCE6XtdQj7sZT2Q8gM7WMLIs39Uic7ewGdHxuoZ63K14IvMO52bWMK7rbuu8G4mMbdvMzMwQDAaJx+O7vhfRyIq702wlaWIzouP10vG4G/BE5h3MzWpfdmMlUywWOXHiBACWZWHbNrFYjEQiQTKZJBwO75jodNOkeyurulbRaW3gZppmmxuBJzoe3YgnMu9QOrVFXs2thrxajxVCMDMzw/j4OCMjI+zbtw+AarVKNpslk8kwOTkJQDweJ5FIkEgkCIVC3mTZQqvnGnii49H9eCLzDmN17cvNrGG2Gy6TZbm5QrJtm7Nnz5JOp3nkkUdIpVLN4s5QKEQoFGJ4eBghBMVikWw2Szqd5sqVKyiK0hScRCJBIBDY8mR5t4XLtnPOVtFp/M8wDEzTBLxW1R53Dk9k3kE04voN4bhZceVOrGQKhQInTpzA7/dz/Phx/H7/hi0AotEo0WiUkZERXNelUCiQzWZZWFjg4sWL6LreFJxkMonP59v2+G4ntysJYqOuoYZhYBgGjuNQKpXo7+/3HKY9dh1PZN4BtG4cbxQeW01rMeFWJyBJkigWi7zyyiuMjY1x4MCBLZ9DlmXi8TjxeJyxsTEcxyGfz5PNZpuht2Aw2BSdeDyOruttY+gm7sR4OolOtVrl3LlzxGKx5u94XUM9dgtPZN7mbNXYspXG721VZGzbZmFhgWq1yrFjx0ilUlsfeAcURSGZTJJMJoF64kAulyObzXLt2jXK5TLhcLgpOsFgcEeuuxN0Szp3Q3QkSWoWhnpdQz12E09k3sa4rku1WuXll1/mqaeeQtO0LR3fmFQaq5/NkM/nOXnyJAC9vb2bFhhXWAhRQ5b8SNLmxqlpGr29vfT29gJ1f7BsNks2m+XSpUtUq1UArl69SjKZbPqGvdNpfWnYTNdQT3Q8bgVPZN6GrK59KZfL23qTbg2Xbeaak5OTXLx4kf379yOEoFwub+I4m7J9nqpzCVdUUKQQQfUIAeUeJGlrqcy6rtPf309/fz9QT5d+7bXXMAyDc+fOYds20Wi0uZ8TiURuq19Yt0zKG61MNxIdwzC8rqEeW8YTmbcZq8Njqlr/E28nS6w1XLYRlmVx5swZcrkcjz32GIlEgmvXrm1KnMrWGYr2ayhSCEUK4YgyefO7CM0hpN235TG34vf7AThy5AiyLFOtVslkMk3jT9d129Klw+Hwrk2U3RIug62FP1cnh3hdQz22iicybyM2qn3ZziTXGi5bj1wux8mTJwmHwzzzzDNtG+83u6btlqk4F1CkKJpc32dRpBCWu0TFPk9APYgs3Xr2WGNSDQaDBIPBZrp0qVRqhteuXbuGJElt6dLBYPBtOVHeSkq31zXUY6t4IvM2oLX2pVNbZFmWd3wlI4Tg+vXrXL58mYMHDzI6Otp2zc2kPzuihEsFXRpo+7kiRbFEBkeUd0RkOiFJEpFIhEgkwr59+3Bdt1mjs7S0xOXLl1FVdU2Nzq1esxvYybqhrTRwWx1e83hn4InMXY7ruti2vWH22HaLKhsTxGqxME2T06dPUywWefzxx4nH4x2PXX2c7RapWJcxnGmEoyGTRBIaLlUUKXLjM4kqMjqy5N/ymFePYbPIskwsFiMWizE6OorjOM0anbm5OS5cuIDP5yOZTDZFp3XVdjO6pSAUdncsWxEdr2voOwNPZO5SWmtfbtYWeSc9yLLZLCdPniQajfLMM8+sm7G2+jjbzZOufgPDmUKWQthODSFARscWJXySjEwQV5SxRJaw+jCKdOdSkFudBqCelt1Il56YmODs2bOEQqG2Gp2bZe+9E0RmNTcTncbPLMsiHo97ovM2xBOZu5CttkXebriscW7XdRFCcO3aNa5cucKhQ4cYGRnZkltAyRyn5kwTVPcDCio2pp3DlYr4pCEckcMSS8hSgJB6H2HtoW2NtxM7semuqio9PT309PQA9WSHxn7OlStXqFQqRCKRNtHp1nTpO7mq6iQ6mUyGCxcu8PjjjwOdLXA80bl78UTmLqNT35ebcSv2MLIsY5omb7zxBuVymSeeeKJZKb7ZawohqDnX0OQokqTQGIoqxzDcLAHtILrSjysqyFIITU5sa6y3E03T6Ovro6+vDwDDMJqic/78eUzTbEuX3kqt0W7TTaG7xji8rqFvXzyRuUvYqO/LzbiVlYwQghMnTpBMJjl+/PimCzpdqYirXqdkhfDJA0goCNrHUBchgSTJK9llyW2NcT1u50Tk8/kYGBhgYGCgad3SEJ2ZmRls28bn86FpGolEgkgkcscmStd1u2qSbiSrgNc19O2IJzJ3AbdiDQPb2/gXQnD
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-05-22 13:01:06 +02:00
"results_train_df = pd.DataFrame(results_train, columns=['max_depth', 'n_features_to_select', 'mse_train'])\n",
"\n",
"fig = plt.figure()\n",
"ax = fig.add_subplot(111, projection='3d')\n",
"\n",
2024-05-22 13:01:06 +02:00
"xs = results_train_df['max_depth']\n",
"ys = results_train_df['n_features_to_select']\n",
"zs = results_train_df['mse_train']\n",
"\n",
"ax.scatter(xs, ys, zs, c=zs, marker='o')\n",
"\n",
"ax.set_xlabel('Max Depth')\n",
"ax.set_ylabel('Number of Features')\n",
"ax.set_zlabel('MSE')\n",
"\n",
"plt.show()"
2024-05-06 23:50:22 +02:00
]
},
{
"cell_type": "code",
2024-05-22 13:01:06 +02:00
"execution_count": 60,
2024-05-06 23:50:22 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-05-22 13:01:06 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZkAAAGHCAYAAABieS8lAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOz9d3Rk+X3di35OqpyRc2h0npme1DPTPaQYxExRlmxKshxomXdJlC3xms9+endZy5aDJEuU7avr9xT8bMmiAvnkK5tUsiiJQUOKEzkzDXQ30GjknCvnOun9UX1qCmiEqgLQQA/PXmvWmgZw6pw6deq3f9+0t2CapokNGzZs2LBxDBBP+gJs2LBhw8bbFzbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRjw4YNGzaODTbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRjw4YNGzaODTbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRjw4YNGzaODTbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRjw4YNGzaODTbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRjw4YNGzaODTbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRjw4YNGzaODTbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRjw4YNGzaODTbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRjw4YNGzaODTbJ2LBhw4aNY4NNMjZs2LBh49hgk4wNGzZs2Dg22CRj44HDNE10Xcc0zZO+FBs2bBwz5JO+ABvfWTAMA1VVyeVyiKKIJEnIsowsy0iShCAIJ32JNmzYOEIIpr2dtPEAYEUvmqZhGAalUqnyc9M0EQShQjqKoiBJkk06Nmy8DWCTjI1jh2maqKqKruuVn5VKJURRrPweylHOTtKpjnJs0rFh4+GDTTI2jhW6rqOqKoZhIIoigiBUUmZ7EYZNOjZsvH1gk4yNY4FpmmiahqZpmKZZIRigki4TBKEmkqgmHcMwKsfYpGPDxumHTTI2jhyGYZBIJJBlGUVR7iOTeklmJ6zoxjTN+0jHqufIsryN2GzYsHEysLvLbBwZrEVfVVVu3bpFb28vnZ2dR36eanKSJGkb6RQKhW1/I0kSTqcTSZJs0rFh4wRgk4yNI0F1egx4oIv5XqSTyWS4ceMG169fr9R0FEWppNds0rFh4/hhk4yNQ8OKXnRdryzcgiCc2LBldRQDIMvyrpHOzhkdm3Rs2Dh62CRjo2HsnH2pXqT3I5kHuZBbnWm7RTqGYVRIRxTF+xoJbNKxYePwsEnGRkPYOfuyc0E+KJI5ycV7L9LRdR1d1ykUCjbp2LBxRLBJxkbdsLrDdkYv1TjJdFn1NdT6d1bNBriPdIrF4raajpVma7Q7zoaN7yTYJGOjZlgLr6qq982+7MRpIJlGsR/paJq2reazU3fNJh0bNrbDJhkbNeGg9NhOnBaSOYpr2It0NE2rKBdYpFOtu2b9vQ0b38mwScbGgaiOXmrdrZ8WkjkO2KRjw0btsEnGxp7YbfalnjrHSZPMg0pdHUQ6QEXhIBgM2qRj4zsKNsnY2BWGYRCPx1laWuLcuXN1L4ingWROCruRztLSEtlslvPnz1d+v1N3zSYdG29H2CRjYxuqpWHy+Tzr6+tcuHCh7tc5LSRzGq6hmnQURdl2j61IZyfpWN1rNmw87LBJxkYFO4v71gxJIzgNJHOaFunqe1GtRmD9ziKdauFQi3Rs11AbDzNskrEB7C4NI4riQ00yDwtqIR3bNdTGwwqbZL7DcZA0jGEYDb3ufiSjqirz8/N4PB7C4TCKojR8/bXA6oo7adTTNFEL6dheOjYeBtgk8x2Mg2ZfjiOSSSQSDA8P43A40DSNXC6H3+8nHA4TDocJhULbFtjD4DQtuoeJ6qpJp9rArVQqbVMjsEnHxmmETTLfodjNFnknDpvyqj7WNE3m5uaYmppiaGio4jNTKpWIx+PE43HGx8cplUoEAoEK6QSDQbvrqgrVmmtgk46N0w+bZL7DsHP25SBpmEbTZaIoViKkUqnErVu3SKfTXL16lWAwWBnudDqdtLe3097eXpHit0hnZWUFTdMIhUIV0vH7/XUvlg9buqyR16wmHeu/YrFIqVQCbKtqGycHm2S+g2Dl9S3iOGi48igimXg8zsjICIFAgOeff77SwrvX+dxuN263m87OTkzTJJvNVkhnfn4eYBvpeL3eh2KxfFBNEPu5hhaLRYrFIrquk8lkaGtrsxWmbRw7bJL5DkB14Xi/9NhOVA8T1rsACYJAMplkZWWFc+fO0dvb29Br+Hw+fD4fPT09mKZJOp0mHo8TjUaZnp5GkqQK4YTDYdxu97bGhdOEk7ie3Ugnn88zNjZGMBis/I3tGmrjuGCTzNsc9QpbVsP6u3pJplgssrq6SqlU4plnnqksZoeFIAgEAgECgQB9fX0YhkEqlSIej7O+vs7ExAQOh4NwOEwkEsHr9R7JeY8Cp6Wd2yIdQRAqUaXtGmrjOGGTzNsYhmGQz+d5+eWXee655+puFbYWFSv6qQXRaJSbN2+iKAotLS1HRjC7QRRFQqEQoVCIgYEBdF0nkUgQj8dZXFwknU4DMDExQSQSeSDt0g8DqjcNtbiG2qRj4zCwSeZtiJ2zL9lstqGddHW6rJZzTk9PMzs7y/nz5ymVSmSz2brPeRhIkkRTUxNNTU0AFYIVBIHZ2Vlu376Nz+fb1i4tyw/uK3BaFuX9ItP9SKdYLNquoTbqhk0ybzPsTI9Zi2gjXWLV6bL9UCwWGRkZoVAo8OyzzxIIBJidnT3xFJH13oeGhpAkaVu79OTkJIVCYduMjqWQfBw46XtRjXrSnzubQ2zXUBv1wiaZtxH2m31pZJGrTpftha2tLW7evElTUxNPPvnktsjgoHM+qNZi6zocDgdtbW20tbUB5UgnkUgQi8UYGxtD0zSCweC2dum344zOYe677Rpqo17YJPM2QPXsy262yKIoHnkkYxgG09PTzM3NcfHiRbq6urad82HQLrPapTs6OjBNk1wuV4l0FhcXMQxjW7u0z+c71EJ5WhbZoyT3egzcdqbXbHxnwCaZhxyGYaBp2r7dY40OVVoLxE6yKBQKjIyMUCqVeO655/D7/bsee9IkU89CKggCXq8Xr9dLd3c3pmmSyWQqpDM7O1tpNLBIx+Px1HyO0zIQCsd7LfWQju0a+p0Bm2QeUlTPvhxki3yUGmSbm5vcvHmT1tZWnnrqqT0L56eBZA4DQRDw+/34/X56e3sxDKMyo7O5ucnU1BSKomyb0XG5XAe+5mnAgyS8g0jH+pmqqhXdOpt03l6wSeYhRL22yI2my6zXNgwDwzCYnJxkYWGBS5cu0dXVdeBxp4VkjuI6RFEkGAwSDAbp7+9H13WSySTxeJzl5WXGx8dxuVzbSMfhcBzB1R89TjKq2o10YrEYd+/e5erVq8DuEjg26Ty8sEnmIcNuvi8H4TALviiKFAoFRkdH0TSNa9eu4fP5jvWcDwMkSSISiRCJRADQNK0yozM/P8/o6Cher7dCONbndRpwmlJ31nXYrqFvX9gk85BgP9+Xg3CYSMY0TUZGRmhvb+fixYs1t/ieBpJ5kAuRLMs0NzfT3NwMlEVBLdKZnp4ml8tVIpujtjSoF4ZhnKpF2mpWAds19O0Im2QeAhxGGgYaK/wbhsHExASapjE0NMTQ0FBdx58GkjlJOBwOWltbaW1tBWBsbKyyO7csDarbpQOBwAONdE7ToryfokQtpGO7hp5u2CRzymEYBslkkps3b/Lss8829OWpt/Cfy+UYGRnBMAwcDkclJVQP9iMZQRC2LRDHjdNAdqIo4vP5OHPmTEWk0upcW1paQtf1Q1sa1IrTlC6D+iKrWknHtjU4PbBJ5pTCSo9ZEUwymTzUAF2tkcza2hq3b9+ms7OT8+fP8+KLLzY8yLnbcaZpsri4yPj4OEBlNx+JRI58YT1tC0u1XpjH48Hj8dDV1fXALQ1OI8k0utmoJh3
2024-05-06 23:50:22 +02:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-05-22 13:01:06 +02:00
"results_test_df = pd.DataFrame(results_test, columns=['max_depth', 'n_features_to_select', 'mse_test'])\n",
"\n",
"fig = plt.figure()\n",
"ax = fig.add_subplot(111, projection='3d')\n",
"\n",
"xs = results_test_df['max_depth']\n",
"ys = results_test_df['n_features_to_select']\n",
"zs = results_test_df['mse_test']\n",
"\n",
"ax.scatter(xs, ys, zs, c=zs, marker='o')\n",
"\n",
2024-05-22 13:01:06 +02:00
"ax.set_xlabel('Max Depth')\n",
"ax.set_ylabel('Number of Features')\n",
"ax.set_zlabel('MSE')\n",
"\n",
2024-05-06 23:50:22 +02:00
"plt.show()"
]
},
{
"cell_type": "code",
2024-05-22 13:01:06 +02:00
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
2024-05-22 13:01:06 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHHCAYAAACRAnNyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAADKZklEQVR4nOzdd3gc1fXw8e9sX0mrVS+WZEnuvRtj3Oi9N2MgMZiaQEJCIAkplAQwvAmQXyDg0AwEiGmmhw62KQb33ots2ZLVpV1p+868f1xbRpZsbCFpJfl8nmcfe2dmZ8+ozdl7z71XMwzDQAghhBCiEzLFOgAhhBBCiIORREUIIYQQnZYkKkIIIYTotCRREUIIIUSnJYmKEEIIITotSVSEEEII0WlJoiKEEEKITksSFSGEEEJ0WpKoCCGEEKLTkkRFCPGDNE3j7rvvjnUYMXf88cdz/PHHNz4vKipC0zSee+65mMV0oANjFKKrk0RFiA72+OOPo2ka48aNa/U5SkpKuPvuu1mxYkXbBdbJzZs3D03TGh9Wq5VevXrx05/+lG3btsU6vCPyzTffcPfdd1NbWxvrUITo9CyxDkCIo81LL71EQUEBixYtYsuWLfTp0+eIz1FSUsI999xDQUEBI0aMaPsgO7Ff/vKXjB07lnA4zLJly3jyySd5//33Wb16NT169OjQWPLz8/H7/Vit1iN63TfffMM999zDVVddRVJSUvsEJ0Q3IS0qQnSg7du388033/Dwww+Tnp7OSy+9FOuQupxJkyZx5ZVXcvXVV/Poo4/y97//nerqap5//vmDvqahoaFdYtE0DYfDgdlsbpfzCyEkURGiQ7300kskJydz1llncfHFFx80UamtreXXv/41BQUF2O12cnNz+elPf0plZSXz5s1j7NixAFx99dWNXSH76iQKCgq46qqrmp3zwNqFUCjEnXfeyejRo3G73cTHxzNp0iS++OKLI76usrIyLBYL99xzT7N9GzduRNM0HnvsMQDC4TD33HMPffv2xeFwkJqaysSJE/nkk0+O+H0BTjzxREAlgQB33303mqaxbt06Lr/8cpKTk5k4cWLj8S+++CKjR4/G6XSSkpLCZZddRnFxcbPzPvnkk/Tu3Run08kxxxzDl19+2eyYg9WobNiwgUsvvZT09HScTif9+/fnj3/8Y2N8t99+OwCFhYWN37+ioqJ2iVGIrk66foToQC+99BIXXnghNpuNadOm8cQTT7B48eLGxAOgvr6eSZMmsX79embMmMGoUaOorKzknXfeYdeuXQwcOJC//OUv3HnnnVx//fVMmjQJgOOOO+6IYvF4PDz99NNMmzaN6667Dq/XyzPPPMNpp53GokWLjqhLKTMzkylTpvDqq69y1113Ndn3yiuvYDabueSSSwB1o545cybXXnstxxxzDB6PhyVLlrBs2TJOOeWUI7oGgK1btwKQmpraZPsll1xC3759uf/++zEMA4D77ruPP//5z1x66aVce+21VFRU8OijjzJ58mSWL1/e2A3zzDPPcMMNN3Dcccfxq1/9im3btnHuueeSkpJCXl7eIeNZtWoVkyZNwmq1cv3111NQUMDWrVt59913ue+++7jwwgvZtGkT//3vf3nkkUdIS0sDID09vcNiFKJLMYQQHWLJkiUGYHzyySeGYRiGrutGbm6uccsttzQ57s477zQAY+7cuc3Ooeu6YRiGsXjxYgMwZs+e3eyY/Px8Y/r06c22T5kyxZgyZUrj80gkYgSDwSbH1NTUGJmZmcaMGTOabAeMu+6665DX9+9//9sAjNWrVzfZPmjQIOPEE09sfD58+HDjrLPOOuS5WvLFF18YgPHss88aFRUVRklJifH+++8bBQUFhqZpxuLFiw3DMIy77rrLAIxp06Y1eX1RUZFhNpuN++67r8n21atXGxaLpXF7KBQyMjIyjBEjRjT5+jz55JMG0ORruH379mbfh8mTJxsul8vYsWNHk/fZ970zDMP429/+ZgDG9u3b2z1GIbo66foRooO89NJLZGZmcsIJJwCqvmHq1KnMmTOHaDTaeNwbb7zB8OHDueCCC5qdQ9O0NovHbDZjs9kA0HWd6upqIpEIY8aMYdmyZUd8vgsvvBCLxcIrr7zSuG3NmjWsW7eOqVOnNm5LSkpi7dq1bN68uVVxz5gxg/T0dHr06MFZZ51FQ0MDzz//PGPGjGly3I033tjk+dy5c9F1nUsvvZTKysrGR1ZWFn379m3s8lqyZAnl5eXceOONjV8fgKuuugq3233I2CoqKliwYAEzZsygZ8+eTfYdzveuI2IUoqvpNonKggULOOecc+jRoweapvHWW28d8TkMw+Dvf/87/fr1w263k5OTw3333df2wYqjTjQaZc6cOZxwwgls376dLVu2sGXLFsaNG0dZWRmfffZZ47Fbt25lyJAhHRLX888/z7BhwxprRdLT03n//fepq6s74nOlpaVx0kkn8eqrrzZue+WVV7BYLFx44YWN2/7yl79QW1tLv379GDp0KLfffjurVq067Pe58847+eSTT/j8889ZtWoVJSUl/OQnP2l2XGFhYZPnmzdvxjAM+vbtS3p6epPH+vXrKS8vB2DHjh0A9O3bt8nr9w2HPpR9w6Rb+/3riBiF6Gq6TY1KQ0MDw4cPZ8aMGU3+KB6JW265hY8//pi///3vDB06lOrqaqqrq9s4UnE0+vzzzyktLWXOnDnMmTOn2f6XXnqJU089tU3e62Cf3KPRaJPRKS+++CJXXXUV559/PrfffjsZGRmYzWZmzpzZWPdxpC677DKuvvpqVqxYwYgRI3j11Vc56aSTGuswACZPnszWrVt5++23+fjjj3n66ad55JFHmDVrFtdee+0PvsfQoUM5+eSTf/A4p9PZ5Lmu62iaxgcffNDiKJ2EhITDuML21RViFKKjdZtE5YwzzuCMM8446P5gMMgf//hH/vvf/1JbW8uQIUN48MEHG0dBrF+/nieeeII1a9bQv39/oPknMiFa66WXXiIjI4N//etfzfbNnTuXN998k1mzZuF0Ounduzdr1qw55PkO1Y2QnJzc4kRiO3bsaPJp+/XXX6dXr17MnTu3yfkOLIY9Eueffz433HBDY/fPpk2buOOOO5odl5KSwtVXX83VV19NfX09kydP5u677z6sRKW1evfujWEYFBYW0q9fv4Mel5+fD6jWjX0jikCNVtq+fTvDhw8/6Gv3fX1b+/3riBiF6Gq6TdfPD7n55ptZuHAhc+bMYdWqVVxyySWcfvrpjf3k7777Lr169eK9996jsLCQgoICrr32WmlRET+a3+9n7ty5nH322Vx88cXNHjfffDNer5d33nkHgIsuuoiVK1fy5ptvNjuXsXf0Snx8PECLCUnv3r359ttvCYVCjdvee++9ZsNb931i33dOgO+++46FCxe2+lqTkpI47bTTePXVV5kzZw42m43zzz+/yTFVVVVNnickJNCnTx+CwWCr3/dwXHjhhZjNZu65554m1wzqa7AvrjFjxpCens6sWbOafA2fe+65H5xJNj09ncmTJ/Pss8+yc+fOZu+xz8G+fx0RoxBdTbdpUTmUnTt3Mnv2bHbu3Nk4c+Vtt93Ghx9+yOzZs7n//vvZtm0bO3bs4LXXXuOFF14gGo3y61//mosvvpjPP/88xlcgurJ33nkHr9fLueee2+L+Y489tnHyt6lTp3L77bfz+uuvc8kllzBjxgxGjx5NdXU177zzDrNmzWL48OH07t2bpKQkZs2ahcvlIj4+nnHjxlFYWMi1117L66+/zumnn86ll17K1q1befHFF+ndu3eT9z377LOZO3cuF1xwAWeddRbbt29n1qxZDBo0iPr6+lZf79SpU7nyyit5/PHHOe2005rNvDpo0CCOP/54Ro8eTUpKCkuWLOH111/n5ptvbvV7Ho7evXtz7733cscdd1BUVMT555+Py+Vi+/btvPnmm1x//fXcdtttWK1W7r33Xm644QZOPPFEpk6dyvbt25k9e/Zh1X/885//ZOLEiYwaNYrrr7+ewsJCioqKeP/99xuXPBg9ejQAf/zjH7nsssuwWq2cc84
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-05-22 13:01:06 +02:00
"X_test_selected = best_model[1].transform(X_test)\n",
"y_pred_test = best_model[0].predict(X_test_selected)\n",
"\n",
"color_column = df_data['Gmina_miejska'] * 0 + df_data['Gmina_miejsko_wiejska'] * 10 + df_data['Gmina_wiejska'] * 5\n",
"\n",
"color_column_train, color_column_test = train_test_split(color_column, test_size=0.2, random_state=0)\n",
"\n",
"min_val = min(min(y_test), min(y_pred_test))\n",
"max_val = max(max(y_test), max(y_pred_test))\n",
"plt.plot([min_val, max_val], [min_val, max_val], 'r--')\n",
"\n",
"plt.scatter(y_test, y_pred_test, alpha=0.5, c=color_column_test, cmap='rainbow')\n",
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.title('Actual vs Predicted')\n",
"\n",
2024-05-22 13:01:06 +02:00
"plt.xlim(0, 0.75*10**7)\n",
"plt.ylim(min(y_pred_test), 0.75*10**7)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-05-22 13:01:06 +02:00
"execution_count": 64,
"metadata": {},
"outputs": [
{
2024-05-22 13:01:06 +02:00
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhcAAAHHCAYAAAAMD3r6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABY1ElEQVR4nO3deXhMZ/8/8PfJNomESEQ2SyyxiyCKUNJWCNUQfSxVtbaebxFbKjRVYg9amqqtVVuf2qrFg2oUtVRRFdJHi1iCKFkQEtkmMXN+f/Rn2mkWmcx9TCbzfl3XuS5zzpnP+UzMTD657/vctyTLsgwiIiIiQaxMnQARERFVLiwuiIiISCgWF0RERCQUiwsiIiISisUFERERCcXigoiIiIRicUFERERCsbggIiIioVhcEBERkVAsLoiIiEgoFhdEZmTDhg2QJAmSJOH48eNFjsuyjDp16kCSJLzyyiu6/dnZ2YiOjkbLli3h6OiIGjVqoHXr1pg4cSLu3LmjO2/WrFm6+MVtqampz+R1EpF5szF1AkRkOHt7e2zevBnPP/+83v6jR4/ijz/+gEql0u0rLCxE165dcenSJQwfPhzjx49HdnY2fv/9d2zevBn9+vWDt7e3XpxVq1bBycmpyHWrV6+uyOshosqFxQWRGXr55Zexfft2LFu2DDY2f32MN2/ejICAANy7d0+3b9euXTh37hw2bdqE119/XS9Ofn4+CgoKisTv378/3NzclHsBRFSpsVuEyAwNHjwY9+/fx4EDB3T7CgoK8PXXXxcpIK5duwYA6Ny5c5E49vb2qFatmrLJEpHFYXFBZIbq1auHwMBAbNmyRbfvu+++Q2ZmJl577TW9c318fAAAX3zxBWRZLlP8jIwM3Lt3T297+PChsPyJqHJjcUFkpl5//XXs2rULeXl5AIBNmzYhKCioyPiJsLAwNGnSBDNnzkT9+vUxcuRIrFu3Dunp6SXGbtKkCWrWrKm3dezYUdHXQ0SVB4sLIjM1cOBA5OXlYe/evXj06BH27t1bpEsEABwcHPDzzz8jMjISwJ93nLz55pvw8vLC+PHjoVarizznm2++wYEDB/S29evXK/6aiKhyYHFBZKZq1qyJ4OBgbN68GTt27IBGo0H//v2LPdfZ2RmLFy/GjRs3cOPGDaxduxZNmjTB8uXLMXfu3CLnd+3aFcHBwXpbYGCg0i+JqEI7duwYQkND4e3tDUmSsGvXLoOen5+fjxEjRsDPzw82NjYICwsrcs6OHTvQvXt31KxZE9WqVUNgYCD2798v5gU8QywuiMzY66+/ju+++w6rV69Gr169ynSrqI+PD0aNGoWffvoJ1atXx6ZNm5RPlKgSyMnJgb+/P1asWFGu52s0Gjg4OGDChAkIDg4u9pxjx46he/fu2LdvH+Lj4/Hiiy8iNDQU586dMyb1Z47FBZEZ69evH6ysrHDq1Kliu0RK4+LigoYNGyIlJUWh7Igql169emHevHno169fscfVajWmTJmCWrVqwdHRER06dMCRI0d0xx0dHbFq1SqMHj0anp6excaIjY3F1KlT8dxzz6FRo0ZYsGABGjVqhD179ijxkhTDeS6IzJiTkxNWrVqFGzduIDQ0tNhzfv31V9SqVavIvBU3b97EhQsX0KRJk2eRKlGlFx4ejgsXLmDr1q3w9vbGzp070bNnT5w/fx6NGjUqV0ytVotHjx7B1dVVcLbKYnFBZOaGDx9e6vEDBw4gOjoaffr0QceOHeHk5ISkpCSsW7cOarUas2bNKvKcr7/+utgZOrt37w4PDw9RqRNVGsnJyVi/fj2Sk5N1d2xNmTIFcXFxWL9+PRYsWFCuuB9++CGys7MxcOBAkekqjsUFUSX3r3/9C48ePcL333+PH374ARkZGXBxcUH79u3xzjvv4MUXXyzynDFjxhQb6/DhwywuiIpx/vx5aDQaNG7cWG+/Wq1GjRo1yhVz8+bNmD17Nv773//C3d1dRJrPjCSXdVYdIiIiAgBIkoSdO3fq7vjYtm0bhgwZgt9//x3W1tZ65zo5ORUZYzFixAg8fPiwxDtOtm7dilGjRmH79u3o3bu3Ei9BUWy5ICIiMlKbNm2g0WiQnp6OLl26GBVry5YtGDVqFLZu3WqWhQXA4oKIiKhMsrOzcfXqVd3j69evIyEhAa6urmjcuDGGDBmCYcOGYcmSJWjTpg3u3r2LQ4cOoVWrVroi4cKFCygoKEBGRgYePXqEhIQEAEDr1q0B/NkVMnz4cHz88cfo0KEDUlNTAfw5GZ6zs/Mzfb3GYLcIERFRGRw5cqTYMUrDhw/Hhg0bUFhYiHnz5uGLL77A7du34ebmho4dO2L27Nnw8/MD8Oe6QDdv3iwS48mv4hdeeAFHjx4t8RrmgsUFERERCcVJtIiIiEgoFhdEREQkFIsLIiIiEqpS3i2iTW389JMM9HthrvCYGZoqwmMSEZFhgupdVvwaon4vWXkqn6sIbLkgIiIioSplywUREVFFooVWSBxzaRFgcUFERKQwjSymuDCXX9rmkicREZHZ0sKyppQylxYWIiIiMhMmb7m4ePEiTp06hcDAQDRt2hSXLl3Cxx9/DLVajTfeeAMvvfRSqc9Xq9VQq9V6+2zVWqhUrJuIiKhiEDXmwlyY9DdwXFwcWrdujSlTpqBNmzaIi4tD165dcfXqVdy8eRM9evTADz/8UGqMmJgYODs7620LP3nwjF4BERHR02lkWchmLky6tkinTp3w0ksvYd68edi6dSvGjh2LMWPGYP78+QCAqKgoxMfH4/vvvy8xRrEtFw/aCm+54DwXRESV07OY5yLrTl0hcap5JwuJozSTFhfOzs6Ij4+Hr68vtFotVCoVTp8+jTZt2gAAfvvtNwQHB+uWnC0rTqJFRERl9SyKi4d36giJU937lpA4SjP5mAtJkgAAVlZWsLe311uvvmrVqsjMzDRVakREREJoeLfIs1OvXj1cuXJF9/jkyZOoW/evpqPk5GR4eXmZIjUiIiIqJ5O2XIwZMwYajUb3uGXLlnrHv/vuu6feLUJERFTRWdo8FyYdc6EUjrkgIqKyehZjLlJvewuJ41nrjpA4SjP5mAslpGuyhcesbiW+BrtRaC88Ztpj56efZKAcrUp4zELZWnhMIqLyCDJ1ApVQpSwuiIiIKhLLmkKLxQUREZHiLO1uERYXRERECtNYVm3BhcuIiIhILLZcEBERKYxjLoiIiEgoDSRTp/BMsVuEiIiIhGLLBRERkcK0Fjagk8UFERGRwtgtQkRERGQEtlwQEREpjC0XREREJJRWloRshjp27BhCQ0Ph7e0NSZKwa9eupz7nyJEjaNu2LVQqFXx9fbFhwwaDr8vigoiIqJLKycmBv78/VqxYUabzr1+/jt69e+PFF19EQkICJk2ahLfeegv79+836LrsFiEiIlKYqbpFevXqhV69epX5/NWrV6N+/fpYsmQJAKBZs2Y4fvw4PvroI4SEhJQ5TqUsLq4UVhEe86uMDsJj5mlthcesbpsnPGahVnwD15286sJjPpbF5lmg4bLwRJZgSjPlr6Exk46CkydPIjg4WG9fSEgIJk2aZFCcSllcEBERVSTlGS9RHLVaDbVarbdPpVJBpVIJiZ+amgoPDw+9fR4eHsjKykJeXh4cHBzKFMc8SikiIiJCTEwMnJ2d9baYmBhTp1UEWy6IiIgUJmrMRVRUFCIiIvT2iWq1AABPT0+kpaXp7UtLS0O1atXK3GoBsLggIiJSnEbQmDCRXSDFCQwMxL59+/T2HThwAIGBgQbFqVDFRU5ODr766itcvXoVXl5eGDx4MGrUqGHqtIiIiMxSdnY2rl69qnt8/fp1JCQkwNXVFXXr1kVUVBRu376NL774AgDw9ttvY/ny5Zg6dSpGjRqFH374AV999RW+/fZbg65r0uKiefPmOH78OFxdXXHr1i107doVDx48QOPGjXHt2jXMnTsXp06dQv369UuMUdzglgK1DDuVZc2GRkREFZfWREMcz5w5gxdffFH3+EmXyvDhw7FhwwakpKQgOTlZd7x+/fr49ttvMXnyZHz88ceoXbs2Pv/8c4NuQwUASZZ
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"results_train_df = pd.DataFrame(results_train, columns=['max_depth', 'n_features_to_select', 'mse_train'])\n",
"\n",
"heatmap_data = results_train_df.pivot(index='max_depth', columns='n_features_to_select', values='mse_train')\n",
"\n",
"sns.heatmap(heatmap_data, annot=False, fmt=\".2f\", cmap=\"viridis\")\n",
"\n",
"plt.title('MSE')\n",
"plt.xlabel('Number of Features')\n",
"plt.ylabel('Max Depth')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhcAAAHHCAYAAAAMD3r6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABanUlEQVR4nO3deViUVf8/8PewDciOgIIKoriDqGiKprgrGYo9Lqm5lt/H3CXReEpxRy1Ny+3JDLRcSktTH1PRFKO0AqVccQHFBURFQRaHZe7fH/2cmlicYc4wDLxf13VfF3PfM5/7MzAzfOacc58jkyRJAhEREZEgJoZOgIiIiKoXFhdEREQkFIsLIiIiEorFBREREQnF4oKIiIiEYnFBREREQrG4ICIiIqFYXBAREZFQLC6IiIhIKBYXREREJBSLCyIjEh0dDZlMBplMhri4uBLHJUlCgwYNIJPJ8Oqrr6r25+TkICIiAj4+PrC2tkbt2rXRpk0bzJgxA/fu3VPdb8GCBar4pW3p6emV8jyJyLiZGToBItKepaUlduzYgZdfflltf2xsLO7cuQO5XK7aV1hYiG7duuHKlSsYO3Yspk2bhpycHFy8eBE7duzA4MGD4e7urhZn48aNsLGxKXFeBwcHvTwfIqpeWFwQGaFXXnkFu3fvxscffwwzs7/exjt27IC/vz8ePnyo2rdv3z6cO3cO27dvx8iRI9XiPHv2DAUFBSXiDxkyBM7Ozvp7AkRUrbFbhMgIjRgxAo8ePUJMTIxqX0FBAfbs2VOigLhx4wYAoEuXLiXiWFpaws7OTr/JElGNw+KCyAg1bNgQAQEB2Llzp2rf999/j6ysLLz++utq9/X09AQAbNu2DZIkaRQ/MzMTDx8+VNuePHkiLH8iqt5YXBAZqZEjR2Lfvn3Iz88HAGzfvh2BgYElxk+EhISgWbNmmD9/Pry8vDB+/Hh8/vnnyMjIKDN2s2bN4OLiorZ16tRJr8+HiKoPFhdERmrYsGHIz8/HwYMH8fTpUxw8eLBElwgAWFlZ4ZdffkFYWBiAP684efPNN+Hm5oZp06ZBoVCUeMw333yDmJgYtS0qKkrvz4mIqgcWF0RGysXFBb1798aOHTvw7bffori4GEOGDCn1vvb29li5ciVu3ryJmzdvYsuWLWjWrBnWrVuHxYsXl7h/t27d0Lt3b7UtICBA30+JqEo7deoUgoOD4e7uDplMhn379mn1+GfPnmHcuHHw9fWFmZkZQkJCStzn22+/RZ8+feDi4gI7OzsEBATgyJEjYp5AJWJxQWTERo4cie+//x6bNm1CUFCQRpeKenp6YsKECfjpp5/g4OCA7du36z9RomogNzcXfn5+WL9+fYUeX1xcDCsrK0yfPh29e/cu9T6nTp1Cnz59cOjQISQkJKBHjx4IDg7GuXPndEm90rG4IDJigwcPhomJCc6cOVNql0h5HB0d0bhxY6SlpekpO6LqJSgoCEuWLMHgwYNLPa5QKDB79mzUq1cP1tbW6NixI06ePKk6bm1tjY0bN2LixImoW7duqTHWrFmDOXPmoEOHDmjSpAmWLVuGJk2a4MCBA/p4SnrDeS6IjJiNjQ02btyImzdvIjg4uNT7/P7776hXr16JeStu3bqFS5cuoVmzZpWRKlG1N3XqVFy6dAm7du2Cu7s79u7di/79++P8+fNo0qRJhWIqlUo8ffoUTk5OgrPVLxYXREZu7Nix5R6PiYlBREQEBg4ciE6dOsHGxgbJycn4/PPPoVAosGDBghKP2bNnT6kzdPbp0wd16tQRlTpRtZGamoqoqCikpqaqrtiaPXs2Dh8+jKioKCxbtqxCcT/88EPk5ORg2LBhItPVOxYXRNXcv/71Lzx9+hRHjx7FDz/8gMzMTDg6OuKll17CO++8gx49epR4zNtvv11qrBMnTrC4ICrF+fPnUVxcjKZNm6rtVygUqF27doVi7tixAwsXLsR3330HV1dXEWlWGpmk6aw6REREBACQyWTYu3ev6oqPr776CqNGjcLFixdhamqqdl8bG5sSYyzGjRuHJ0+elHnFya5duzBhwgTs3r0bAwYM0MdT0Cu2XBAREemobdu2KC4uRkZGBrp27apTrJ07d2LChAnYtWuXURYWAIsLIiIijeTk5OD69euq2ykpKUhMTISTkxOaNm2KUaNGYcyYMVi1ahXatm2LBw8e4Pjx42jdurWqSLh06RIKCgqQmZmJp0+fIjExEQDQpk0bAH92hYwdOxZr165Fx44dkZ6eDuDPyfDs7e0r9fnqgt0iREREGjh58mSpY5TGjh2L6OhoFBYWYsmSJdi2bRvu3r0LZ2dndOrUCQsXLoSvry+AP9cFunXrVokYz/8Vd+/eHbGxsWWew1iwuCAiIiKhOIkWERERCcXigoiIiIRicUFERERCVcurRRqtXS08pvyhTHhMGUe7EBEZ3KUls/R+DmV60xffSQMmda8KiaNvbLkgIiIioaplywUREVFVooRSSBxjaRFgcUFERKRnxZKY4sJY/mkbS55ERERGS4maNcjOWFpYiIiIyEgYvOXi8uXLOHPmDAICAtC8eXNcuXIFa9euhUKhwBtvvIGePXuW+3iFQgGFQqG2TyoqgszM4E+NiIgIgLgxF8bCoC0Xhw8fRps2bTB79my0bdsWhw8fRrdu3XD9+nXcunULffv2xQ8//FBujMjISNjb26ttT2KOV9IzICIierFiSRKyGQuDFheLFi1CWFgYHj16hKioKIwcORITJ05ETEwMjh8/jrCwMCxfvrzcGOHh4cjKylLbHPr0qqRnQERERP9k0OLi4sWLGDduHABg2LBhePr0KYYMGaI6PmrUKPzxxx/lxpDL5bCzs1Pb2CVCRERViRKSkM1YGPy/sEz258yXJiYmsLS0VFuv3tbWFllZWYZKjYiISIhiIyoMRDBoy0XDhg1x7do11e3Tp0/Dw8NDdTs1NRVubm6GSI2IiIgqyKAtF2+//TaKi4tVt318fNSOf//99y+8WoSIiKiqM6YuDREMWlxMmjSp3OPLli2rpEyIiIj0x5iu9BDB4GMu9EFpLv564vz6wkPC8p6p8JgW2cJDosBOfExO30ZEVH1Vy+KCiIioKqlZU2ixuCAiItK7mna1CIsLIiIiPSuuWbUFe76JiIhILLZcEBER6RnHXBAREZFQxZAZOoVKxW4RIiIiEootF0RERHqmrGEDOllcEBER6Rm7RYiIiIh0wJYLIiIiPWPLBREREQmllGRCNm1ERkaiQ4cOsLW1haurK0JCQpCUlFTuYzZv3oyuXbvC0dERjo6O6N27N3799Vetny+LCyIiomooNjYWU6ZMwZkzZxATE4PCwkL07dsXubm5ZT7m5MmTGDFiBE6cOIHTp0+jQYMG6Nu3L+7evavVuWWSVP3WgW244UPxQcUvYMpVUYmIqoArEbP0fo6zqR5C4rTzSK3wYx88eABXV1fExsaiW7duGj2muLgYjo6OWLduHcaMGaPxuarlmAuZXaHwmOaplsJjyvQwZZs+Ylpmio+pNNdDTD0UgKLJisXHVMrFx5SM4HcJ6Of3qZfnroevcKLzNBH/samX97mxKhb0jUqhUEChUKjtk8vlkMtf/EGQlZUFAHByctL4fHl5eSgsLNTqMQC/PxIREemdqDEXkZGRsLe3V9siIyNffH6lEjNnzkSXLl3g4+Ojcd5z586Fu7s7evfurdXzrZYtF0RERNVReHg4QkND1fZp0moxZcoUXLhwAXFxcRqfa/ny5di1axdOnjwJS0vtWu9ZXBAREemZqEtRNe0C+bupU6fi4MGDOHXqFOrXr6/RYz788EMsX74cx44dQ+vWrbXOk8UFERGRnhVLlT8KQZIkTJs2DXv37sXJkyfh5eWl0eNWrlyJpUuX4siRI2jfvn2Fzl2liovc3Fx8/fXXuH79Otzc3DBixAjUrl3b0GkREREZnSlTpmDHjh347rvvYGtri/T0dACAvb09rKysAABjxoxBvXr1VOM2VqxYgfnz52PHjh1o2LCh6jE2NjawsbHR+NwGLS5atmyJuLg4ODk54fbt2+jWrRseP36Mpk2b4sa
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-05-22 13:01:06 +02:00
"results_test_df = pd.DataFrame(results_test, columns=['max_depth', 'n_features_to_select', 'mse_test'])\n",
"\n",
"heatmap_data = results_test_df.pivot(index='max_depth', columns='n_features_to_select', values='mse_test')\n",
"\n",
"sns.heatmap(heatmap_data, annot=False, fmt=\".2f\", cmap=\"viridis\")\n",
"\n",
"plt.title('MSE')\n",
"plt.xlabel('Number of Features')\n",
"plt.ylabel('Max Depth')\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-05-22 13:01:06 +02:00
"execution_count": 45,
2024-05-06 23:50:22 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-22 13:01:06 +02:00
"0.3853 — Dochody_podatek_od_nieruchomosci\n",
"0.2161 — Dochody_podatek_od_srodkow_transportowych\n",
"0.0911 — Powierzchnia\n",
"0.0670 — Wynagrodzenie_ogolem\n",
"0.0581 — Dochody_podatek_PCC\n",
"0.0424 — Dochody_razem\n",
"0.0292 — Dochody_z_majatku\n",
"0.0286 — Dochody_podatek_od_spadkow\n",
"0.0277 — Dochody_podatek_rolny\n",
"0.0225 — Dochody_podatek_od_dzialalnosci_gospodarczej\n",
"0.0156 — Wynagrodzenie_w_relacji_do_sredniej\n",
"0.0107 — Dochody_podatek_odrebne_ustawy\n",
"0.0057 — Dochody_podatek_lesny\n"
2024-05-06 23:50:22 +02:00
]
}
],
"source": [
2024-05-22 13:01:06 +02:00
"feature_importance = dict(zip(feature_names, best_model[0].feature_importances_))\n",
2024-05-06 23:50:22 +02:00
"for feature, importance in sorted(feature_importance.items(), key=lambda x: x[1], reverse=True):\n",
2024-05-22 13:01:06 +02:00
" print(f'{importance:.4f} \\u2014 {feature}')"
2024-05-06 23:50:22 +02:00
]
},
{
"cell_type": "code",
2024-05-22 13:01:06 +02:00
"execution_count": 46,
"metadata": {},
2024-05-22 13:01:06 +02:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_14224\\3704736663.py:1: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
" X_test['Przewidziana Suma'] = y_pred_test\n",
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_14224\\3704736663.py:2: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
" X_test['Suma'] = y_test\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Powierzchnia</th>\n",
" <th>Wynagrodzenie_ogolem</th>\n",
" <th>Wynagrodzenie_w_relacji_do_sredniej</th>\n",
" <th>Dochody_podatek_lesny</th>\n",
" <th>Dochody_podatek_PCC</th>\n",
" <th>Dochody_podatek_od_dzialalnosci_gospodarczej</th>\n",
" <th>Dochody_podatek_od_nieruchomosci</th>\n",
" <th>Dochody_podatek_od_spadkow</th>\n",
" <th>Dochody_podatek_od_srodkow_transportowych</th>\n",
" <th>Dochody_podatek_rolny</th>\n",
" <th>...</th>\n",
" <th>Bezrobotni_mezczyzni</th>\n",
" <th>Bezrobotni_ogolem</th>\n",
" <th>Bezrobotni_powyzej_50_roku_zycia</th>\n",
" <th>Gmina_miejska</th>\n",
" <th>Gmina_miejsko_wiejska</th>\n",
" <th>Gmina_wiejska</th>\n",
" <th>Odleglosc_Warszawa</th>\n",
" <th>Odleglosc_od_centrum_decyzyjnego</th>\n",
" <th>Przewidziana Suma</th>\n",
" <th>Suma</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1319</th>\n",
" <td>100.0</td>\n",
" <td>3779.10</td>\n",
" <td>72.9</td>\n",
" <td>93981.92</td>\n",
" <td>365408.23</td>\n",
" <td>5990.13</td>\n",
" <td>3065899.23</td>\n",
" <td>44610.00</td>\n",
" <td>189528.36</td>\n",
" <td>910440.39</td>\n",
" <td>...</td>\n",
" <td>142.0</td>\n",
" <td>367.0</td>\n",
" <td>81.5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>261.0</td>\n",
" <td>77.0</td>\n",
" <td>1860350.76</td>\n",
" <td>1860350.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2619</th>\n",
" <td>102.0</td>\n",
" <td>3463.10</td>\n",
" <td>71.6</td>\n",
" <td>99953.98</td>\n",
" <td>104129.00</td>\n",
" <td>12044.14</td>\n",
" <td>2395129.05</td>\n",
" <td>14909.00</td>\n",
" <td>307592.55</td>\n",
" <td>79375.79</td>\n",
" <td>...</td>\n",
" <td>57.0</td>\n",
" <td>130.0</td>\n",
" <td>32.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>198.0</td>\n",
" <td>91.0</td>\n",
" <td>1498567.97</td>\n",
" <td>1498567.97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>898</th>\n",
" <td>125.0</td>\n",
" <td>4786.11</td>\n",
" <td>86.7</td>\n",
" <td>3707.00</td>\n",
" <td>187923.00</td>\n",
" <td>2366.98</td>\n",
" <td>1150221.59</td>\n",
" <td>55974.84</td>\n",
" <td>197984.69</td>\n",
" <td>981887.41</td>\n",
" <td>...</td>\n",
" <td>71.0</td>\n",
" <td>135.0</td>\n",
" <td>33.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>100.0</td>\n",
" <td>46.0</td>\n",
" <td>486083.96</td>\n",
" <td>486083.96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>615</th>\n",
" <td>172.0</td>\n",
" <td>3764.14</td>\n",
" <td>77.9</td>\n",
" <td>256320.29</td>\n",
" <td>81812.00</td>\n",
" <td>0.00</td>\n",
" <td>1006100.88</td>\n",
" <td>34139.00</td>\n",
" <td>35029.00</td>\n",
" <td>241699.70</td>\n",
" <td>...</td>\n",
" <td>30.0</td>\n",
" <td>98.5</td>\n",
" <td>25.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>162.0</td>\n",
" <td>48.0</td>\n",
" <td>3493097.22</td>\n",
" <td>3493097.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1317</th>\n",
" <td>86.0</td>\n",
" <td>3779.10</td>\n",
" <td>72.9</td>\n",
" <td>50013.00</td>\n",
" <td>361760.03</td>\n",
" <td>12538.98</td>\n",
" <td>5595064.42</td>\n",
" <td>35481.32</td>\n",
" <td>286227.23</td>\n",
" <td>689321.45</td>\n",
" <td>...</td>\n",
" <td>93.5</td>\n",
" <td>287.0</td>\n",
" <td>73.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>249.0</td>\n",
" <td>79.0</td>\n",
" <td>1860350.76</td>\n",
" <td>1860350.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3289</th>\n",
" <td>110.0</td>\n",
" <td>4195.22</td>\n",
" <td>76.0</td>\n",
" <td>14385.61</td>\n",
" <td>196259.08</td>\n",
" <td>3860.54</td>\n",
" <td>1036514.37</td>\n",
" <td>20938.34</td>\n",
" <td>148191.22</td>\n",
" <td>858593.83</td>\n",
" <td>...</td>\n",
" <td>38.5</td>\n",
" <td>87.5</td>\n",
" <td>23.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>197.0</td>\n",
" <td>126.0</td>\n",
" <td>183762.63</td>\n",
" <td>183762.63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2771</th>\n",
" <td>131.0</td>\n",
" <td>4515.28</td>\n",
" <td>99.7</td>\n",
" <td>188729.01</td>\n",
" <td>245472.61</td>\n",
" <td>14341.25</td>\n",
" <td>3335288.19</td>\n",
" <td>57052.58</td>\n",
" <td>267934.40</td>\n",
" <td>48568.21</td>\n",
" <td>...</td>\n",
" <td>135.0</td>\n",
" <td>326.5</td>\n",
" <td>75.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>338.0</td>\n",
" <td>83.0</td>\n",
" <td>913353.54</td>\n",
" <td>913353.54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1269</th>\n",
" <td>32.0</td>\n",
" <td>3400.75</td>\n",
" <td>75.1</td>\n",
" <td>37752.65</td>\n",
" <td>125139.00</td>\n",
" <td>0.00</td>\n",
" <td>990652.07</td>\n",
" <td>9266.00</td>\n",
" <td>202607.60</td>\n",
" <td>86466.96</td>\n",
" <td>...</td>\n",
" <td>67.5</td>\n",
" <td>139.0</td>\n",
" <td>32.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>309.0</td>\n",
" <td>55.0</td>\n",
" <td>108165.60</td>\n",
" <td>108165.60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1246</th>\n",
" <td>126.0</td>\n",
" <td>3681.50</td>\n",
" <td>76.1</td>\n",
" <td>238373.86</td>\n",
" <td>319604.16</td>\n",
" <td>1755.85</td>\n",
" <td>3169234.07</td>\n",
" <td>55944.22</td>\n",
" <td>233611.73</td>\n",
" <td>41946.11</td>\n",
" <td>...</td>\n",
" <td>119.0</td>\n",
" <td>350.5</td>\n",
" <td>91.5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>312.0</td>\n",
" <td>86.0</td>\n",
" <td>26439.77</td>\n",
" <td>26439.77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>519</th>\n",
" <td>159.0</td>\n",
" <td>3691.03</td>\n",
" <td>76.3</td>\n",
" <td>240844.33</td>\n",
" <td>138434.00</td>\n",
" <td>5983.54</td>\n",
" <td>1326617.29</td>\n",
" <td>19542.30</td>\n",
" <td>63370.83</td>\n",
" <td>542170.27</td>\n",
" <td>...</td>\n",
" <td>186.0</td>\n",
" <td>387.5</td>\n",
" <td>73.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>166.0</td>\n",
" <td>55.0</td>\n",
" <td>2563499.46</td>\n",
" <td>2563499.46</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>214 rows × 102 columns</p>\n",
"</div>"
],
"text/plain": [
" Powierzchnia Wynagrodzenie_ogolem Wynagrodzenie_w_relacji_do_sredniej \n",
"1319 100.0 3779.10 72.9 \\\n",
"2619 102.0 3463.10 71.6 \n",
"898 125.0 4786.11 86.7 \n",
"615 172.0 3764.14 77.9 \n",
"1317 86.0 3779.10 72.9 \n",
"... ... ... ... \n",
"3289 110.0 4195.22 76.0 \n",
"2771 131.0 4515.28 99.7 \n",
"1269 32.0 3400.75 75.1 \n",
"1246 126.0 3681.50 76.1 \n",
"519 159.0 3691.03 76.3 \n",
"\n",
" Dochody_podatek_lesny Dochody_podatek_PCC \n",
"1319 93981.92 365408.23 \\\n",
"2619 99953.98 104129.00 \n",
"898 3707.00 187923.00 \n",
"615 256320.29 81812.00 \n",
"1317 50013.00 361760.03 \n",
"... ... ... \n",
"3289 14385.61 196259.08 \n",
"2771 188729.01 245472.61 \n",
"1269 37752.65 125139.00 \n",
"1246 238373.86 319604.16 \n",
"519 240844.33 138434.00 \n",
"\n",
" Dochody_podatek_od_dzialalnosci_gospodarczej \n",
"1319 5990.13 \\\n",
"2619 12044.14 \n",
"898 2366.98 \n",
"615 0.00 \n",
"1317 12538.98 \n",
"... ... \n",
"3289 3860.54 \n",
"2771 14341.25 \n",
"1269 0.00 \n",
"1246 1755.85 \n",
"519 5983.54 \n",
"\n",
" Dochody_podatek_od_nieruchomosci Dochody_podatek_od_spadkow \n",
"1319 3065899.23 44610.00 \\\n",
"2619 2395129.05 14909.00 \n",
"898 1150221.59 55974.84 \n",
"615 1006100.88 34139.00 \n",
"1317 5595064.42 35481.32 \n",
"... ... ... \n",
"3289 1036514.37 20938.34 \n",
"2771 3335288.19 57052.58 \n",
"1269 990652.07 9266.00 \n",
"1246 3169234.07 55944.22 \n",
"519 1326617.29 19542.30 \n",
"\n",
" Dochody_podatek_od_srodkow_transportowych Dochody_podatek_rolny ... \n",
"1319 189528.36 910440.39 ... \\\n",
"2619 307592.55 79375.79 ... \n",
"898 197984.69 981887.41 ... \n",
"615 35029.00 241699.70 ... \n",
"1317 286227.23 689321.45 ... \n",
"... ... ... ... \n",
"3289 148191.22 858593.83 ... \n",
"2771 267934.40 48568.21 ... \n",
"1269 202607.60 86466.96 ... \n",
"1246 233611.73 41946.11 ... \n",
"519 63370.83 542170.27 ... \n",
"\n",
" Bezrobotni_mezczyzni Bezrobotni_ogolem \n",
"1319 142.0 367.0 \\\n",
"2619 57.0 130.0 \n",
"898 71.0 135.0 \n",
"615 30.0 98.5 \n",
"1317 93.5 287.0 \n",
"... ... ... \n",
"3289 38.5 87.5 \n",
"2771 135.0 326.5 \n",
"1269 67.5 139.0 \n",
"1246 119.0 350.5 \n",
"519 186.0 387.5 \n",
"\n",
" Bezrobotni_powyzej_50_roku_zycia Gmina_miejska Gmina_miejsko_wiejska \n",
"1319 81.5 0 1 \\\n",
"2619 32.5 0 0 \n",
"898 33.5 0 0 \n",
"615 25.0 0 0 \n",
"1317 73.5 0 0 \n",
"... ... ... ... \n",
"3289 23.5 0 0 \n",
"2771 75.0 0 0 \n",
"1269 32.5 0 0 \n",
"1246 91.5 0 1 \n",
"519 73.0 0 0 \n",
"\n",
" Gmina_wiejska Odleglosc_Warszawa Odleglosc_od_centrum_decyzyjnego \n",
"1319 0 261.0 77.0 \\\n",
"2619 1 198.0 91.0 \n",
"898 1 100.0 46.0 \n",
"615 1 162.0 48.0 \n",
"1317 1 249.0 79.0 \n",
"... ... ... ... \n",
"3289 1 197.0 126.0 \n",
"2771 1 338.0 83.0 \n",
"1269 1 309.0 55.0 \n",
"1246 0 312.0 86.0 \n",
"519 1 166.0 55.0 \n",
"\n",
" Przewidziana Suma Suma \n",
"1319 1860350.76 1860350.76 \n",
"2619 1498567.97 1498567.97 \n",
"898 486083.96 486083.96 \n",
"615 3493097.22 3493097.22 \n",
"1317 1860350.76 1860350.76 \n",
"... ... ... \n",
"3289 183762.63 183762.63 \n",
"2771 913353.54 913353.54 \n",
"1269 108165.60 108165.60 \n",
"1246 26439.77 26439.77 \n",
"519 2563499.46 2563499.46 \n",
"\n",
"[214 rows x 102 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"array([1860350.76, 1498567.97, 486083.96, 3493097.22, 571715.87,\n",
" 1028236.29, 4252230.31, 3304757.1 , 89784. , 905697.19,\n",
" 1035082.51, 43758.91, 1754503.74, 1181942.75, 29737.5 ,\n",
" 113582.96, 115037.1 , 122196.66, 3052544.65, 29928. ,\n",
" 590971.38, 36054.23, 2602729.18, 701604.12, 144384.92,\n",
" 4688787.43, 3483121.16, 501415.56, 62322.53, 4711234.74,\n",
" 100580.97, 571292.08, 884181.41, 2322080.77, 36297.77,\n",
" 3435512.15, 350776.06, 1790128.78, 2563499.46, 276609. ,\n",
" 1187850.12, 54795.97, 2516963.67, 26439.77, 1780651.04,\n",
" 1900488.13, 596683.9 , 45304.07, 39659.65, 77247.93,\n",
" 456071.25, 688016.43, 614119.12, 53798.55, 31818.33,\n",
" 518641.88, 77554.07, 370119.47, 1995853.47, 766096.1 ,\n",
" 672921.52, 14382.12, 40444.31, 36465.76, 2874827.68,\n",
" 75721.97, 665436.59, 1572715.87, 2360742.42, 317790.93,\n",
" 252263.98, 53527.5 , 354722.08, 634649.02, 1741560.58,\n",
" 1032024.64, 69323.36, 269989.33, 1196987.75, 1174307.6 ,\n",
" 65352.18, 39132.19, 444907.31, 68280.63, 5982228.28,\n",
" 631640.54, 1237415. , 1365031.79, 843874.12, 532083.12,\n",
" 698800.88, 561422.18, 597143.38, 3845249.2 , 1899389.61,\n",
" 4913480.94, 46025.06, 1241526.23, 215991.46, 1238429.57,\n",
" 876422.69, 2108206.97, 1212092.02, 404277.79, 627054.31,\n",
" 1930677.18, 1207596.26, 6009755.61, 37393.52, 2826740.85,\n",
" 32875.04, 54712.8 , 1096763.79, 444887.4 , 701447.65,\n",
" 2438554.01, 3469490.2 , 545834.72, 4386259.35, 716524. ,\n",
" 239990.51, 24409.58, 29277.49, 41269.91, 110053.1 ,\n",
" 1119850.85, 521292.01, 78713.16, 27896.66, 1016449.03,\n",
" 266041.56, 30507.42, 51702.71, 115062.65, 183762.63,\n",
" 913353.54, 108165.6 ])"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"X_test['Przewidziana Suma'] = y_pred_test\n",
"X_test['Suma'] = y_test\n",
"display(X_test[np.abs(y_test - y_pred_test) < 1])\n",
"display(X_test['Suma'][np.abs(y_test - y_pred_test) < 1].drop_duplicates().values)"
]
2024-05-22 13:55:56 +02:00
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.3391 — Dochody_podatek_od_srodkow_transportowych\n",
"0.1816 — Dochody_podatek_od_spadkow\n",
"0.1590 — Powierzchnia\n",
"0.0714 — Wynagrodzenie_ogolem\n",
"0.0675 — Dochody_podatek_lesny\n",
"0.0377 — Dochody_razem\n",
"0.0312 — Dochody_podatek_rolny\n",
"0.0258 — Dochody_z_majatku\n",
"0.0220 — Dochody_podatek_PCC\n",
"0.0172 — Wynagrodzenie_w_relacji_do_sredniej\n",
"0.0172 — Dochody_podatek_od_dzialalnosci_gospodarczej\n",
"0.0158 — Dochody_podatek_od_nieruchomosci\n",
"0.0144 — Dochody_podatek_odrebne_ustawy\n"
]
}
],
"source": [
"selector = RFE(estimator=DecisionTreeRegressor(random_state=0, max_depth=best_params[0]), n_features_to_select=best_params[1])\n",
"selector.fit(X, y)\n",
"X_selected = selector.transform(X)\n",
"\n",
"model = DecisionTreeRegressor(random_state=0, max_depth=best_params[0])\n",
"model.fit(X_selected, y)\n",
"\n",
"feature_importance = dict(zip(feature_names, model.feature_importances_))\n",
"for feature, importance in sorted(feature_importance.items(), key=lambda x: x[1], reverse=True):\n",
" print(f'{importance:.4f} \\u2014 {feature}')"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"# plot_tree(model, fontsize=5)"
]
2024-05-06 22:55:21 +02:00
}
],
"metadata": {
"language_info": {
2024-05-22 13:01:06 +02:00
"name": "python"
2024-05-06 22:55:21 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}