WUT_Computer_Science/main.ipynb

1975 lines
110 KiB
Plaintext
Raw Normal View History

2024-05-06 22:55:21 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 124,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 125,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"pd.options.display.float_format = '{:.2f}'.format"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 126,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_26160\\3760256257.py:1: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_dofinansowanie = pd.read_csv(\n"
]
}
],
"source": [
"df_dofinansowanie = pd.read_csv(\n",
" 'umowy_pelna_lista_krajowe.csv',\n",
" encoding='ISO-8859-2',\n",
" converters={'TERYT pe?ny': str},\n",
" thousands=',')\n",
"\n",
"df_dofinansowanie = df_dofinansowanie.loc[df_dofinansowanie['TERYT pe?ny'] != ''].reset_index(drop=True)\n",
"\n",
"df_dofinansowanie['Dofinansowanie UE (PLN)'] = \\\n",
" df_dofinansowanie['Dofinansowanie UE (PLN)'].apply(pd.to_numeric)\n",
"\n",
"df_dofinansowanie['Data rozpocz?cia realizacji'] = pd.to_datetime(df_dofinansowanie['Data rozpocz?cia realizacji'])\n",
"df_dofinansowanie['Rok rozpocz?cia realizacji'] = df_dofinansowanie['Data rozpocz?cia realizacji'].dt.year\n",
"\n",
"df_dofinansowanie['Data podpisania umowy pierwotnej'] = pd.to_datetime(df_dofinansowanie['Data podpisania umowy pierwotnej'])\n",
"df_dofinansowanie['Rok podpisania umowy pierwotnej'] = df_dofinansowanie['Data podpisania umowy pierwotnej'].dt.year"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 127,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_dofinansowanie_agg = df_dofinansowanie \\\n",
" .groupby(['TERYT pe?ny', 'Rok rozpocz?cia realizacji'])['Dofinansowanie UE (PLN)'].sum().reset_index()\n",
"df_dofinansowanie_agg = df_dofinansowanie_agg \\\n",
" .rename(columns={'TERYT pe?ny': 'Kod', 'Rok rozpocz?cia realizacji': 'Rok', 'Dofinansowanie UE (PLN)': 'Suma'})\n",
"df_dofinansowanie_agg = df_dofinansowanie_agg \\\n",
" .loc[df_dofinansowanie_agg['Kod'].str.len() == 7].reset_index(drop=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 128,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_podz = pd.read_csv(\n",
" 'PODZ_1410_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str})\n",
"df_podz = df_podz[['Kod', 'Rok', 'Wartosc']]\n",
"df_podz = df_podz.loc[df_podz['Kod'].str.endswith(('1', '2', '3'))]\n",
"df_podz = df_podz.dropna()\n",
"df_podz = df_podz.rename(columns={\n",
" 'Wartosc': 'Powierzchnia'})"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 129,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_wyna = pd.read_csv(\n",
" 'WYNA_2497_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_wyna = df_wyna[['Kod', 'Wyszczególnienie', 'Rok', 'Wartosc']]\n",
"df_wyna = df_wyna.dropna()\n",
"df_wyna = df_wyna.pivot_table(index=['Kod', 'Rok'], columns='Wyszczególnienie', values='Wartosc').reset_index()\n",
"df_wyna = df_wyna.rename(columns={\n",
" 'ogółem': 'Wynagrodzenie_ogolem',\n",
" 'przeciętne miesięczne wynagrodzenia brutto w relacji do średniej krajowej (Polska=100)': 'Wynagrodzenie_w_relacji_do_sredniej'})"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 130,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-05-06 23:50:22 +02:00
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_26160\\1671418303.py:1: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-05-06 22:55:21 +02:00
" df_fina_1 = pd.read_csv(\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Rodzaje dochodów</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Dochody_podatek_lesny</th>\n",
" <th>Dochody_podatek_PCC</th>\n",
2024-05-06 23:50:22 +02:00
" <th>Dochody_podatek_od_dzialalnosci_gospodarczej</th>\n",
2024-05-06 22:55:21 +02:00
" <th>Dochody_podatek_od_nieruchomosci</th>\n",
" <th>Dochody_podatek_od_spadkow</th>\n",
" <th>Dochody_podatek_od_srodkow_transportowych</th>\n",
" <th>Dochody_podatek_rolny</th>\n",
" <th>Dochody_podatek_odrebne_ustawy</th>\n",
" <th>Dochody_razem</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2004</td>\n",
" <td>NaN</td>\n",
" <td>549608.00</td>\n",
" <td>NaN</td>\n",
" <td>13532989.00</td>\n",
" <td>NaN</td>\n",
" <td>625159.00</td>\n",
" <td>23687.00</td>\n",
" <td>NaN</td>\n",
" <td>41378568.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2005</td>\n",
" <td>NaN</td>\n",
" <td>609855.00</td>\n",
" <td>NaN</td>\n",
" <td>13667398.00</td>\n",
" <td>NaN</td>\n",
" <td>700134.00</td>\n",
" <td>26634.00</td>\n",
" <td>15438121.00</td>\n",
" <td>43417443.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2006</td>\n",
" <td>NaN</td>\n",
" <td>844223.65</td>\n",
" <td>NaN</td>\n",
" <td>14633962.72</td>\n",
" <td>NaN</td>\n",
" <td>747182.64</td>\n",
" <td>11683.60</td>\n",
" <td>16647124.98</td>\n",
" <td>50319253.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2007</td>\n",
" <td>NaN</td>\n",
" <td>1344365.01</td>\n",
" <td>NaN</td>\n",
" <td>14944781.74</td>\n",
" <td>NaN</td>\n",
" <td>777345.52</td>\n",
" <td>19377.36</td>\n",
" <td>17436387.93</td>\n",
" <td>62025513.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2008</td>\n",
" <td>6799.55</td>\n",
" <td>1790135.40</td>\n",
" <td>NaN</td>\n",
" <td>16089534.56</td>\n",
" <td>NaN</td>\n",
" <td>836441.10</td>\n",
" <td>30823.60</td>\n",
" <td>19149551.45</td>\n",
" <td>80755930.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47078</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>154462.39</td>\n",
" <td>5361951.37</td>\n",
" <td>572868.36</td>\n",
" <td>108107448.79</td>\n",
" <td>437144.83</td>\n",
" <td>589658.88</td>\n",
" <td>51297.75</td>\n",
" <td>115274832.37</td>\n",
" <td>261780766.79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47079</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>150329.31</td>\n",
" <td>6088184.20</td>\n",
" <td>468411.51</td>\n",
" <td>38527846.59</td>\n",
" <td>228886.23</td>\n",
" <td>608637.40</td>\n",
" <td>64855.15</td>\n",
" <td>46137150.39</td>\n",
" <td>167638796.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47080</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>156556.52</td>\n",
" <td>5125090.74</td>\n",
" <td>329522.12</td>\n",
" <td>78767466.83</td>\n",
" <td>552009.16</td>\n",
" <td>558925.68</td>\n",
" <td>48689.09</td>\n",
" <td>85538260.14</td>\n",
" <td>263006955.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47081</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>163778.36</td>\n",
" <td>9082482.28</td>\n",
" <td>492045.28</td>\n",
" <td>78491368.35</td>\n",
" <td>947992.83</td>\n",
" <td>602586.14</td>\n",
" <td>59824.46</td>\n",
" <td>89840077.70</td>\n",
" <td>252345800.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47082</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>174823.49</td>\n",
" <td>7474079.65</td>\n",
" <td>1019054.56</td>\n",
" <td>84996948.99</td>\n",
" <td>593315.54</td>\n",
" <td>627169.86</td>\n",
" <td>50987.00</td>\n",
" <td>94936379.09</td>\n",
" <td>259310641.60</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>47083 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
"Rodzaje dochodów Kod Rok Dochody_podatek_lesny Dochody_podatek_PCC \n",
"0 0201011 2004 NaN 549608.00 \\\n",
"1 0201011 2005 NaN 609855.00 \n",
"2 0201011 2006 NaN 844223.65 \n",
"3 0201011 2007 NaN 1344365.01 \n",
"4 0201011 2008 6799.55 1790135.40 \n",
"... ... ... ... ... \n",
"47078 3263011 2018 154462.39 5361951.37 \n",
"47079 3263011 2019 150329.31 6088184.20 \n",
"47080 3263011 2020 156556.52 5125090.74 \n",
"47081 3263011 2021 163778.36 9082482.28 \n",
"47082 3263011 2022 174823.49 7474079.65 \n",
"\n",
2024-05-06 23:50:22 +02:00
"Rodzaje dochodów Dochody_podatek_od_dzialalnosci_gospodarczej \n",
"0 NaN \\\n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 572868.36 \n",
"47079 468411.51 \n",
"47080 329522.12 \n",
"47081 492045.28 \n",
"47082 1019054.56 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_od_nieruchomosci \n",
"0 13532989.00 \\\n",
"1 13667398.00 \n",
"2 14633962.72 \n",
"3 14944781.74 \n",
"4 16089534.56 \n",
"... ... \n",
"47078 108107448.79 \n",
"47079 38527846.59 \n",
"47080 78767466.83 \n",
"47081 78491368.35 \n",
"47082 84996948.99 \n",
2024-05-06 22:55:21 +02:00
"\n",
"Rodzaje dochodów Dochody_podatek_od_spadkow \n",
"0 NaN \\\n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 437144.83 \n",
"47079 228886.23 \n",
"47080 552009.16 \n",
"47081 947992.83 \n",
"47082 593315.54 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_od_srodkow_transportowych \n",
"0 625159.00 \\\n",
"1 700134.00 \n",
"2 747182.64 \n",
"3 777345.52 \n",
"4 836441.10 \n",
"... ... \n",
"47078 589658.88 \n",
"47079 608637.40 \n",
"47080 558925.68 \n",
"47081 602586.14 \n",
"47082 627169.86 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_rolny Dochody_podatek_odrebne_ustawy \n",
"0 23687.00 NaN \\\n",
"1 26634.00 15438121.00 \n",
"2 11683.60 16647124.98 \n",
"3 19377.36 17436387.93 \n",
"4 30823.60 19149551.45 \n",
"... ... ... \n",
"47078 51297.75 115274832.37 \n",
"47079 64855.15 46137150.39 \n",
"47080 48689.09 85538260.14 \n",
"47081 59824.46 89840077.70 \n",
"47082 50987.00 94936379.09 \n",
"\n",
"Rodzaje dochodów Dochody_razem \n",
"0 41378568.00 \n",
"1 43417443.00 \n",
"2 50319253.08 \n",
"3 62025513.24 \n",
"4 80755930.93 \n",
"... ... \n",
"47078 261780766.79 \n",
"47079 167638796.15 \n",
"47080 263006955.07 \n",
"47081 252345800.93 \n",
"47082 259310641.60 \n",
"\n",
"[47083 rows x 11 columns]"
]
},
2024-05-06 23:50:22 +02:00
"execution_count": 130,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fina_1 = pd.read_csv(\n",
" 'FINA_2622_CREL_1.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_fina_1 = df_fina_1[['Kod', 'Rodzaje dochodów', 'Rok', 'Wartosc']]\n",
"df_fina_1 = df_fina_1.dropna()\n",
"df_fina_1 = df_fina_1.pivot_table(index=['Kod', 'Rok'], columns='Rodzaje dochodów', values='Wartosc').reset_index()\n",
"df_fina_1 = df_fina_1.rename(columns={\n",
" 'dochody podatkowe - podatek leśny': 'Dochody_podatek_lesny',\n",
" 'dochody podatkowe - podatek od czynności cywilnoprawnych': 'Dochody_podatek_PCC',\n",
2024-05-06 23:50:22 +02:00
" 'dochody podatkowe - podatek od działalności gospodarczej osób fizycznych, opłacany w formie karty podatkowej': 'Dochody_podatek_od_dzialalnosci_gospodarczej',\n",
2024-05-06 22:55:21 +02:00
" 'dochody podatkowe - podatek od nieruchomości': 'Dochody_podatek_od_nieruchomosci',\n",
" 'dochody podatkowe - podatek od spadków i darowizn': 'Dochody_podatek_od_spadkow',\n",
" 'dochody podatkowe - podatek od środków transportowych': 'Dochody_podatek_od_srodkow_transportowych',\n",
" 'dochody podatkowe - podatek rolny': 'Dochody_podatek_rolny',\n",
" 'dochody podatkowe - ustalone i pobierane na podstawie odrębnych ustaw': 'Dochody_podatek_odrebne_ustawy',\n",
" 'razem': 'Dochody_razem'})\n",
"\n",
"df_fina_1"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 131,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_26160\\2161929356.py:1: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_fina_2 = pd.read_csv(\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Rodzaje dochodów</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Dochody_z_majatku</th>\n",
" <th>Dochody_z_najmu_i_dzierzawy</th>\n",
" <th>Dochody_z_uslug</th>\n",
" <th>Dochody_dofinansowanie_inwestycyjne</th>\n",
" <th>Dochody_dofinansowanie_razem</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_od_osob_fizycznych</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_od_osob_prywatnych</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_razem</th>\n",
" <th>Wplywy_z_innych_lokalnych_oplat</th>\n",
" <th>Wplywy_z_oplaty_eksploatacyjnej</th>\n",
" <th>Wplywy_z_oplaty_skarbowej</th>\n",
" <th>Wplywy_z_oplaty_targowej</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2004</td>\n",
" <td>5344205.00</td>\n",
" <td>NaN</td>\n",
" <td>184307.00</td>\n",
" <td>NaN</td>\n",
" <td>519209.00</td>\n",
" <td>13285456.00</td>\n",
" <td>1065169.00</td>\n",
" <td>14350625.00</td>\n",
" <td>44200.00</td>\n",
" <td>NaN</td>\n",
" <td>1209998.00</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2005</td>\n",
" <td>4560489.00</td>\n",
" <td>NaN</td>\n",
" <td>96462.00</td>\n",
" <td>NaN</td>\n",
" <td>9024183.00</td>\n",
" <td>15985331.00</td>\n",
" <td>1170863.00</td>\n",
" <td>17156194.00</td>\n",
" <td>42840.00</td>\n",
" <td>NaN</td>\n",
" <td>1282943.00</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2006</td>\n",
" <td>8528727.69</td>\n",
" <td>NaN</td>\n",
" <td>231470.96</td>\n",
" <td>8752288.98</td>\n",
" <td>8864860.57</td>\n",
" <td>18101668.00</td>\n",
" <td>1048115.83</td>\n",
" <td>19149783.83</td>\n",
" <td>37365.00</td>\n",
" <td>NaN</td>\n",
" <td>1203990.73</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2007</td>\n",
" <td>15042480.34</td>\n",
" <td>9219682.12</td>\n",
" <td>339654.15</td>\n",
" <td>18153240.30</td>\n",
" <td>18438743.21</td>\n",
" <td>21785308.00</td>\n",
" <td>1336702.02</td>\n",
" <td>23122010.02</td>\n",
" <td>78798.51</td>\n",
" <td>NaN</td>\n",
" <td>1228704.53</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2008</td>\n",
" <td>22797881.07</td>\n",
" <td>9546379.31</td>\n",
" <td>787256.69</td>\n",
" <td>5046691.69</td>\n",
" <td>5182137.79</td>\n",
" <td>23974587.00</td>\n",
" <td>1532633.44</td>\n",
" <td>25507220.44</td>\n",
" <td>83882.94</td>\n",
" <td>NaN</td>\n",
" <td>1364245.93</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47078</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>16419859.31</td>\n",
" <td>4261374.83</td>\n",
" <td>1996824.80</td>\n",
" <td>25285.92</td>\n",
" <td>237485.34</td>\n",
" <td>52799183.00</td>\n",
" <td>2690098.17</td>\n",
" <td>55489281.17</td>\n",
" <td>10458871.30</td>\n",
" <td>4684.54</td>\n",
" <td>434077.88</td>\n",
" <td>608625.90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47079</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>8844350.07</td>\n",
" <td>4324758.68</td>\n",
" <td>2187576.47</td>\n",
" <td>0.00</td>\n",
" <td>225831.84</td>\n",
" <td>55319040.00</td>\n",
" <td>2770684.17</td>\n",
" <td>58089724.17</td>\n",
" <td>11369287.11</td>\n",
" <td>3456.95</td>\n",
" <td>415686.53</td>\n",
" <td>610059.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47080</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>13485033.97</td>\n",
" <td>6159923.01</td>\n",
" <td>1917372.55</td>\n",
" <td>21002107.00</td>\n",
" <td>21192313.05</td>\n",
" <td>53739656.00</td>\n",
" <td>3144444.38</td>\n",
" <td>56884100.38</td>\n",
" <td>12281916.71</td>\n",
" <td>5157.50</td>\n",
" <td>355201.29</td>\n",
" <td>507341.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47081</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>16928500.75</td>\n",
" <td>7582499.62</td>\n",
" <td>4110105.72</td>\n",
" <td>888293.63</td>\n",
" <td>1072910.83</td>\n",
" <td>63936763.00</td>\n",
" <td>3975531.95</td>\n",
" <td>67912294.95</td>\n",
" <td>17127683.55</td>\n",
" <td>27746.70</td>\n",
" <td>416473.03</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47082</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>30415536.99</td>\n",
" <td>8651170.05</td>\n",
" <td>4117086.30</td>\n",
" <td>207597.50</td>\n",
" <td>800347.63</td>\n",
" <td>64657287.40</td>\n",
" <td>4082611.64</td>\n",
" <td>68739899.04</td>\n",
" <td>19150342.25</td>\n",
" <td>5035.87</td>\n",
" <td>421424.91</td>\n",
" <td>1233266.30</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>47083 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
"Rodzaje dochodów Kod Rok Dochody_z_majatku \n",
"0 0201011 2004 5344205.00 \\\n",
"1 0201011 2005 4560489.00 \n",
"2 0201011 2006 8528727.69 \n",
"3 0201011 2007 15042480.34 \n",
"4 0201011 2008 22797881.07 \n",
"... ... ... ... \n",
"47078 3263011 2018 16419859.31 \n",
"47079 3263011 2019 8844350.07 \n",
"47080 3263011 2020 13485033.97 \n",
"47081 3263011 2021 16928500.75 \n",
"47082 3263011 2022 30415536.99 \n",
"\n",
"Rodzaje dochodów Dochody_z_najmu_i_dzierzawy Dochody_z_uslug \n",
"0 NaN 184307.00 \\\n",
"1 NaN 96462.00 \n",
"2 NaN 231470.96 \n",
"3 9219682.12 339654.15 \n",
"4 9546379.31 787256.69 \n",
"... ... ... \n",
"47078 4261374.83 1996824.80 \n",
"47079 4324758.68 2187576.47 \n",
"47080 6159923.01 1917372.55 \n",
"47081 7582499.62 4110105.72 \n",
"47082 8651170.05 4117086.30 \n",
"\n",
"Rodzaje dochodów Dochody_dofinansowanie_inwestycyjne \n",
"0 NaN \\\n",
"1 NaN \n",
"2 8752288.98 \n",
"3 18153240.30 \n",
"4 5046691.69 \n",
"... ... \n",
"47078 25285.92 \n",
"47079 0.00 \n",
"47080 21002107.00 \n",
"47081 888293.63 \n",
"47082 207597.50 \n",
"\n",
"Rodzaje dochodów Dochody_dofinansowanie_razem \n",
"0 519209.00 \\\n",
"1 9024183.00 \n",
"2 8864860.57 \n",
"3 18438743.21 \n",
"4 5182137.79 \n",
"... ... \n",
"47078 237485.34 \n",
"47079 225831.84 \n",
"47080 21192313.05 \n",
"47081 1072910.83 \n",
"47082 800347.63 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_od_osob_fizycznych \n",
"0 13285456.00 \\\n",
"1 15985331.00 \n",
"2 18101668.00 \n",
"3 21785308.00 \n",
"4 23974587.00 \n",
"... ... \n",
"47078 52799183.00 \n",
"47079 55319040.00 \n",
"47080 53739656.00 \n",
"47081 63936763.00 \n",
"47082 64657287.40 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_od_osob_prywatnych \n",
"0 1065169.00 \\\n",
"1 1170863.00 \n",
"2 1048115.83 \n",
"3 1336702.02 \n",
"4 1532633.44 \n",
"... ... \n",
"47078 2690098.17 \n",
"47079 2770684.17 \n",
"47080 3144444.38 \n",
"47081 3975531.95 \n",
"47082 4082611.64 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_razem \n",
"0 14350625.00 \\\n",
"1 17156194.00 \n",
"2 19149783.83 \n",
"3 23122010.02 \n",
"4 25507220.44 \n",
"... ... \n",
"47078 55489281.17 \n",
"47079 58089724.17 \n",
"47080 56884100.38 \n",
"47081 67912294.95 \n",
"47082 68739899.04 \n",
"\n",
"Rodzaje dochodów Wplywy_z_innych_lokalnych_oplat \n",
"0 44200.00 \\\n",
"1 42840.00 \n",
"2 37365.00 \n",
"3 78798.51 \n",
"4 83882.94 \n",
"... ... \n",
"47078 10458871.30 \n",
"47079 11369287.11 \n",
"47080 12281916.71 \n",
"47081 17127683.55 \n",
"47082 19150342.25 \n",
"\n",
"Rodzaje dochodów Wplywy_z_oplaty_eksploatacyjnej Wplywy_z_oplaty_skarbowej \n",
"0 NaN 1209998.00 \\\n",
"1 NaN 1282943.00 \n",
"2 NaN 1203990.73 \n",
"3 NaN 1228704.53 \n",
"4 NaN 1364245.93 \n",
"... ... ... \n",
"47078 4684.54 434077.88 \n",
"47079 3456.95 415686.53 \n",
"47080 5157.50 355201.29 \n",
"47081 27746.70 416473.03 \n",
"47082 5035.87 421424.91 \n",
"\n",
"Rodzaje dochodów Wplywy_z_oplaty_targowej \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 608625.90 \n",
"47079 610059.50 \n",
"47080 507341.00 \n",
"47081 0.00 \n",
"47082 1233266.30 \n",
"\n",
"[47083 rows x 14 columns]"
]
},
2024-05-06 23:50:22 +02:00
"execution_count": 131,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fina_2 = pd.read_csv(\n",
" 'FINA_2622_CREL_2.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_fina_2 = df_fina_2[['Kod', 'Rodzaje dochodów', 'Rok', 'Wartosc']]\n",
"df_fina_2 = df_fina_2.dropna()\n",
"df_fina_2 = df_fina_2.pivot_table(index=['Kod', 'Rok'], columns='Rodzaje dochodów', values='Wartosc').reset_index()\n",
"df_fina_2 = df_fina_2.rename(columns={\n",
" 'dochody z majątku': 'Dochody_z_majatku',\n",
" 'dochody z majątku - dochody z najmu i dzierżawy składników majątkowych JST oraz innych umów o podobnym charakterze': 'Dochody_z_najmu_i_dzierzawy',\n",
" 'pozostałe dochody - wpływy z usług': 'Dochody_z_uslug',\n",
" 'pozostałe dochody - środki na dofinansowanie własnych zadań pozyskane z innych źródeł - inwestycyjne': 'Dochody_dofinansowanie_inwestycyjne',\n",
" 'pozostałe dochody - środki na dofinansowanie własnych zadań pozyskane z innych źródeł - razem': 'Dochody_dofinansowanie_razem',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa podatek dochodowy od osób fizycznych': 'Udzialy_w_podatkach_dochodowych_od_osob_fizycznych',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa podatek dochodowy od osób prawnych': 'Udzialy_w_podatkach_dochodowych_od_osob_prywatnych',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa razem': 'Udzialy_w_podatkach_dochodowych_razem',\n",
" 'wpływy z innych lokalnych opłat pobieranych przez jednostki samorządu terytorialnego na podstawie odrębnych ustaw': 'Wplywy_z_innych_lokalnych_oplat',\n",
" 'wpływy z opłaty eksploatacyjnej': 'Wplywy_z_oplaty_eksploatacyjnej',\n",
" 'wpływy z opłaty skarbowej': 'Wplywy_z_oplaty_skarbowej',\n",
" 'wpływy z opłaty targowej': 'Wplywy_z_oplaty_targowej'})\n",
"\n",
"df_fina_2"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 132,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_ogolem</th>\n",
" <th>Ludnosc_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>40309.00</td>\n",
" <td>7683.00</td>\n",
" <td>26085.00</td>\n",
" <td>15183.00</td>\n",
" <td>10902.00</td>\n",
" <td>6541.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>40119.00</td>\n",
" <td>8020.00</td>\n",
" <td>25647.00</td>\n",
" <td>15047.00</td>\n",
" <td>10600.00</td>\n",
" <td>6452.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>39851.00</td>\n",
" <td>8392.00</td>\n",
" <td>25160.00</td>\n",
" <td>14932.00</td>\n",
" <td>10228.00</td>\n",
" <td>6299.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>39603.00</td>\n",
" <td>8678.00</td>\n",
" <td>24720.00</td>\n",
" <td>14784.00</td>\n",
" <td>9936.00</td>\n",
" <td>6205.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>39464.00</td>\n",
" <td>8971.00</td>\n",
" <td>24307.00</td>\n",
" <td>14645.00</td>\n",
" <td>9662.00</td>\n",
" <td>6186.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48606</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>40910.00</td>\n",
" <td>10472.00</td>\n",
" <td>24549.00</td>\n",
" <td>14683.00</td>\n",
" <td>9866.00</td>\n",
" <td>5889.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48607</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>40888.00</td>\n",
" <td>10788.00</td>\n",
" <td>24209.00</td>\n",
" <td>14429.00</td>\n",
" <td>9780.00</td>\n",
" <td>5891.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48608</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>40326.00</td>\n",
" <td>10962.00</td>\n",
" <td>23544.00</td>\n",
" <td>13798.00</td>\n",
" <td>9746.00</td>\n",
" <td>5820.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48609</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>39834.00</td>\n",
" <td>11050.00</td>\n",
" <td>22976.00</td>\n",
" <td>13277.00</td>\n",
" <td>9699.00</td>\n",
" <td>5808.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48610</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>39368.00</td>\n",
" <td>11157.00</td>\n",
" <td>22486.00</td>\n",
" <td>12802.00</td>\n",
" <td>9684.00</td>\n",
" <td>5725.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48611 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_ogolem Ludnosc_w_wieku_poprodukcyjnym \n",
"0 0201011 2010 40309.00 7683.00 \\\n",
"1 0201011 2011 40119.00 8020.00 \n",
"2 0201011 2012 39851.00 8392.00 \n",
"3 0201011 2013 39603.00 8678.00 \n",
"4 0201011 2014 39464.00 8971.00 \n",
"... ... ... ... ... \n",
"48606 3263011 2018 40910.00 10472.00 \n",
"48607 3263011 2019 40888.00 10788.00 \n",
"48608 3263011 2020 40326.00 10962.00 \n",
"48609 3263011 2021 39834.00 11050.00 \n",
"48610 3263011 2022 39368.00 11157.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_produkcyjnym Ludnosc_w_wieku_produkcyjnym_mobilnym \n",
"0 26085.00 15183.00 \\\n",
"1 25647.00 15047.00 \n",
"2 25160.00 14932.00 \n",
"3 24720.00 14784.00 \n",
"4 24307.00 14645.00 \n",
"... ... ... \n",
"48606 24549.00 14683.00 \n",
"48607 24209.00 14429.00 \n",
"48608 23544.00 13798.00 \n",
"48609 22976.00 13277.00 \n",
"48610 22486.00 12802.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_produkcyjnym_niemobilnym \n",
"0 10902.00 \\\n",
"1 10600.00 \n",
"2 10228.00 \n",
"3 9936.00 \n",
"4 9662.00 \n",
"... ... \n",
"48606 9866.00 \n",
"48607 9780.00 \n",
"48608 9746.00 \n",
"48609 9699.00 \n",
"48610 9684.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_przedprodukcyjnym \n",
"0 6541.00 \n",
"1 6452.00 \n",
"2 6299.00 \n",
"3 6205.00 \n",
"4 6186.00 \n",
"... ... \n",
"48606 5889.00 \n",
"48607 5891.00 \n",
"48608 5820.00 \n",
"48609 5808.00 \n",
"48610 5725.00 \n",
"\n",
"[48611 rows x 8 columns]"
]
},
2024-05-06 23:50:22 +02:00
"execution_count": 132,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_1 = pd.read_csv( # ogolem\n",
" 'LUDN_1342_CREL_1.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_1 = df_ludn_1[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_1 = df_ludn_1.dropna()\n",
"df_ludn_1 = df_ludn_1.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_1 = df_ludn_1.rename(columns={\n",
" 'ogółem': 'Ludnosc_ogolem',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_1"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 133,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_mezczyzni</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>19085.00</td>\n",
" <td>2153.00</td>\n",
" <td>13535.00</td>\n",
" <td>7720.00</td>\n",
" <td>5815.00</td>\n",
" <td>3397.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>18985.00</td>\n",
" <td>2222.00</td>\n",
" <td>13398.00</td>\n",
" <td>7647.00</td>\n",
" <td>5751.00</td>\n",
" <td>3365.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>18859.00</td>\n",
" <td>2370.00</td>\n",
" <td>13238.00</td>\n",
" <td>7611.00</td>\n",
" <td>5627.00</td>\n",
" <td>3251.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>18737.00</td>\n",
" <td>2477.00</td>\n",
" <td>13028.00</td>\n",
" <td>7501.00</td>\n",
" <td>5527.00</td>\n",
" <td>3232.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>18640.00</td>\n",
" <td>2620.00</td>\n",
" <td>12832.00</td>\n",
" <td>7442.00</td>\n",
" <td>5390.00</td>\n",
" <td>3188.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48606</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>19690.00</td>\n",
" <td>3501.00</td>\n",
" <td>13202.00</td>\n",
" <td>7547.00</td>\n",
" <td>5655.00</td>\n",
" <td>2987.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48607</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>19683.00</td>\n",
" <td>3644.00</td>\n",
" <td>13044.00</td>\n",
" <td>7417.00</td>\n",
" <td>5627.00</td>\n",
" <td>2995.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48608</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>19356.00</td>\n",
" <td>3749.00</td>\n",
" <td>12617.00</td>\n",
" <td>6986.00</td>\n",
" <td>5631.00</td>\n",
" <td>2990.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48609</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>19096.00</td>\n",
" <td>3852.00</td>\n",
" <td>12267.00</td>\n",
" <td>6747.00</td>\n",
" <td>5520.00</td>\n",
" <td>2977.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48610</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>18869.00</td>\n",
" <td>3901.00</td>\n",
" <td>12009.00</td>\n",
" <td>6485.00</td>\n",
" <td>5524.00</td>\n",
" <td>2959.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48611 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_mezczyzni \n",
"0 0201011 2010 19085.00 \\\n",
"1 0201011 2011 18985.00 \n",
"2 0201011 2012 18859.00 \n",
"3 0201011 2013 18737.00 \n",
"4 0201011 2014 18640.00 \n",
"... ... ... ... \n",
"48606 3263011 2018 19690.00 \n",
"48607 3263011 2019 19683.00 \n",
"48608 3263011 2020 19356.00 \n",
"48609 3263011 2021 19096.00 \n",
"48610 3263011 2022 18869.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_poprodukcyjnym \n",
"0 2153.00 \\\n",
"1 2222.00 \n",
"2 2370.00 \n",
"3 2477.00 \n",
"4 2620.00 \n",
"... ... \n",
"48606 3501.00 \n",
"48607 3644.00 \n",
"48608 3749.00 \n",
"48609 3852.00 \n",
"48610 3901.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym \n",
"0 13535.00 \\\n",
"1 13398.00 \n",
"2 13238.00 \n",
"3 13028.00 \n",
"4 12832.00 \n",
"... ... \n",
"48606 13202.00 \n",
"48607 13044.00 \n",
"48608 12617.00 \n",
"48609 12267.00 \n",
"48610 12009.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym \n",
"0 7720.00 \\\n",
"1 7647.00 \n",
"2 7611.00 \n",
"3 7501.00 \n",
"4 7442.00 \n",
"... ... \n",
"48606 7547.00 \n",
"48607 7417.00 \n",
"48608 6986.00 \n",
"48609 6747.00 \n",
"48610 6485.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym \n",
"0 5815.00 \\\n",
"1 5751.00 \n",
"2 5627.00 \n",
"3 5527.00 \n",
"4 5390.00 \n",
"... ... \n",
"48606 5655.00 \n",
"48607 5627.00 \n",
"48608 5631.00 \n",
"48609 5520.00 \n",
"48610 5524.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym \n",
"0 3397.00 \n",
"1 3365.00 \n",
"2 3251.00 \n",
"3 3232.00 \n",
"4 3188.00 \n",
"... ... \n",
"48606 2987.00 \n",
"48607 2995.00 \n",
"48608 2990.00 \n",
"48609 2977.00 \n",
"48610 2959.00 \n",
"\n",
"[48611 rows x 8 columns]"
]
},
2024-05-06 23:50:22 +02:00
"execution_count": 133,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_2 = pd.read_csv( # mezczyzni\n",
" 'LUDN_1342_CREL_2.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_2 = df_ludn_2[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_2 = df_ludn_2.dropna()\n",
"df_ludn_2 = df_ludn_2.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_2 = df_ludn_2.rename(columns={\n",
" 'ogółem': 'Ludnosc_mezczyzni',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_mezczyzni_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_2"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 134,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_kobiety</th>\n",
" <th>Ludnosc_kobiety_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>21224.00</td>\n",
" <td>5530.00</td>\n",
" <td>12550.00</td>\n",
" <td>7463.00</td>\n",
" <td>5087.00</td>\n",
" <td>3144.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>21134.00</td>\n",
" <td>5798.00</td>\n",
" <td>12249.00</td>\n",
" <td>7400.00</td>\n",
" <td>4849.00</td>\n",
" <td>3087.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>20992.00</td>\n",
" <td>6022.00</td>\n",
" <td>11922.00</td>\n",
" <td>7321.00</td>\n",
" <td>4601.00</td>\n",
" <td>3048.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>20866.00</td>\n",
" <td>6201.00</td>\n",
" <td>11692.00</td>\n",
" <td>7283.00</td>\n",
" <td>4409.00</td>\n",
" <td>2973.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>20824.00</td>\n",
" <td>6351.00</td>\n",
" <td>11475.00</td>\n",
" <td>7203.00</td>\n",
" <td>4272.00</td>\n",
" <td>2998.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48606</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>21220.00</td>\n",
" <td>6971.00</td>\n",
" <td>11347.00</td>\n",
" <td>7136.00</td>\n",
" <td>4211.00</td>\n",
" <td>2902.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48607</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>21205.00</td>\n",
" <td>7144.00</td>\n",
" <td>11165.00</td>\n",
" <td>7012.00</td>\n",
" <td>4153.00</td>\n",
" <td>2896.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48608</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>20970.00</td>\n",
" <td>7213.00</td>\n",
" <td>10927.00</td>\n",
" <td>6812.00</td>\n",
" <td>4115.00</td>\n",
" <td>2830.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48609</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>20738.00</td>\n",
" <td>7198.00</td>\n",
" <td>10709.00</td>\n",
" <td>6530.00</td>\n",
" <td>4179.00</td>\n",
" <td>2831.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48610</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>20499.00</td>\n",
" <td>7256.00</td>\n",
" <td>10477.00</td>\n",
" <td>6317.00</td>\n",
" <td>4160.00</td>\n",
" <td>2766.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48611 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_kobiety Ludnosc_kobiety_w_wieku_poprodukcyjnym \n",
"0 0201011 2010 21224.00 5530.00 \\\n",
"1 0201011 2011 21134.00 5798.00 \n",
"2 0201011 2012 20992.00 6022.00 \n",
"3 0201011 2013 20866.00 6201.00 \n",
"4 0201011 2014 20824.00 6351.00 \n",
"... ... ... ... ... \n",
"48606 3263011 2018 21220.00 6971.00 \n",
"48607 3263011 2019 21205.00 7144.00 \n",
"48608 3263011 2020 20970.00 7213.00 \n",
"48609 3263011 2021 20738.00 7198.00 \n",
"48610 3263011 2022 20499.00 7256.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym \n",
"0 12550.00 \\\n",
"1 12249.00 \n",
"2 11922.00 \n",
"3 11692.00 \n",
"4 11475.00 \n",
"... ... \n",
"48606 11347.00 \n",
"48607 11165.00 \n",
"48608 10927.00 \n",
"48609 10709.00 \n",
"48610 10477.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym \n",
"0 7463.00 \\\n",
"1 7400.00 \n",
"2 7321.00 \n",
"3 7283.00 \n",
"4 7203.00 \n",
"... ... \n",
"48606 7136.00 \n",
"48607 7012.00 \n",
"48608 6812.00 \n",
"48609 6530.00 \n",
"48610 6317.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym \n",
"0 5087.00 \\\n",
"1 4849.00 \n",
"2 4601.00 \n",
"3 4409.00 \n",
"4 4272.00 \n",
"... ... \n",
"48606 4211.00 \n",
"48607 4153.00 \n",
"48608 4115.00 \n",
"48609 4179.00 \n",
"48610 4160.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_przedprodukcyjnym \n",
"0 3144.00 \n",
"1 3087.00 \n",
"2 3048.00 \n",
"3 2973.00 \n",
"4 2998.00 \n",
"... ... \n",
"48606 2902.00 \n",
"48607 2896.00 \n",
"48608 2830.00 \n",
"48609 2831.00 \n",
"48610 2766.00 \n",
"\n",
"[48611 rows x 8 columns]"
]
},
2024-05-06 23:50:22 +02:00
"execution_count": 134,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_3 = pd.read_csv( # kobiety\n",
" 'LUDN_1342_CREL_3.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_3 = df_ludn_3[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_3 = df_ludn_3.dropna()\n",
"df_ludn_3 = df_ludn_3.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_3 = df_ludn_3.rename(columns={\n",
" 'ogółem': 'Ludnosc_kobiety',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_kobiety_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_kobiety_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 135,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_data = df_dofinansowanie_agg.copy()"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 136,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_data = df_data.merge(df_podz, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_podz['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_podz'))\n",
"df_data = df_data.drop(['key_0', 'Kod_podz'], axis=1)"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 137,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_data = df_data.merge(df_wyna, left_on=[df_data['Kod'].str.slice(stop=-3), 'Rok'], right_on=[df_wyna['Kod'].str.slice(stop=-3), 'Rok'], how='left', suffixes=(None, '_wyna'))\n",
"df_data = df_data.drop(['key_0', 'Kod_wyna'], axis=1)"
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 138,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_data = df_data.merge(df_fina_1, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_fina_1['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_fina_1'))\n",
2024-05-06 23:50:22 +02:00
"df_data = df_data.drop(['key_0', 'Kod_fina_1'], axis=1)\n",
"\n",
"df_data = df_data.merge(df_fina_2, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_fina_2['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_fina_2'))\n",
"df_data = df_data.drop(['key_0', 'Kod_fina_2'], axis=1)\n"
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 139,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
2024-05-06 23:50:22 +02:00
"df_data = df_data.merge(df_ludn_1, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_ludn_1['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_ludn_1'))\n",
"df_data = df_data.drop(['key_0', 'Kod_ludn_1'], axis=1)\n",
"\n",
"df_data = df_data.merge(df_ludn_2, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_ludn_2['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_ludn_2'))\n",
"df_data = df_data.drop(['key_0', 'Kod_ludn_2'], axis=1)\n",
"\n",
"df_data = df_data.merge(df_ludn_3, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_ludn_3['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_ludn_3'))\n",
"df_data = df_data.drop(['key_0', 'Kod_ludn_3'], axis=1)"
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 140,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-05-06 23:50:22 +02:00
"Index(['Kod', 'Rok', 'Suma', 'Powierzchnia', 'Wynagrodzenie_ogolem',\n",
" 'Wynagrodzenie_w_relacji_do_sredniej', 'Dochody_podatek_lesny',\n",
" 'Dochody_podatek_PCC', 'Dochody_podatek_od_dzialalnosci_gospodarczej',\n",
" 'Dochody_podatek_od_nieruchomosci', 'Dochody_podatek_od_spadkow',\n",
" 'Dochody_podatek_od_srodkow_transportowych', 'Dochody_podatek_rolny',\n",
" 'Dochody_podatek_odrebne_ustawy', 'Dochody_razem', 'Dochody_z_majatku',\n",
" 'Dochody_z_najmu_i_dzierzawy', 'Dochody_z_uslug',\n",
" 'Dochody_dofinansowanie_inwestycyjne', 'Dochody_dofinansowanie_razem',\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_fizycznych',\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_prywatnych',\n",
" 'Udzialy_w_podatkach_dochodowych_razem',\n",
" 'Wplywy_z_innych_lokalnych_oplat', 'Wplywy_z_oplaty_eksploatacyjnej',\n",
" 'Wplywy_z_oplaty_skarbowej', 'Wplywy_z_oplaty_targowej',\n",
" 'Ludnosc_ogolem', 'Ludnosc_w_wieku_poprodukcyjnym',\n",
" 'Ludnosc_w_wieku_produkcyjnym', 'Ludnosc_w_wieku_produkcyjnym_mobilnym',\n",
" 'Ludnosc_w_wieku_produkcyjnym_niemobilnym',\n",
" 'Ludnosc_w_wieku_przedprodukcyjnym', 'Ludnosc_mezczyzni',\n",
" 'Ludnosc_mezczyzni_w_wieku_poprodukcyjnym',\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym',\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym',\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym',\n",
" 'Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym', 'Ludnosc_kobiety',\n",
" 'Ludnosc_kobiety_w_wieku_poprodukcyjnym',\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym',\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym',\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym',\n",
" 'Ludnosc_kobiety_w_wieku_przedprodukcyjnym'],\n",
" dtype='object')"
2024-05-06 22:55:21 +02:00
]
},
2024-05-06 23:50:22 +02:00
"execution_count": 140,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-05-06 23:50:22 +02:00
"df_data.columns"
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 141,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"# df_data[df_data.isna().any(axis=1)] # ['Rok'].drop_duplicates().reset_index(drop=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-05-06 23:50:22 +02:00
"..."
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean Squared Error: 314617008704682.4\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"# from sklearn.preprocessing import StandardScaler\n",
"# from sklearn.preprocessing import MinMaxScaler\n",
"from sklearn.tree import DecisionTreeRegressor, plot_tree\n",
"from sklearn.metrics import mean_squared_error\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df_data.dropna(inplace=True)\n",
"\n",
"feature_names = [\n",
" 'Powierzchnia',\n",
" 'Wynagrodzenie_ogolem',\n",
" 'Wynagrodzenie_w_relacji_do_sredniej',\n",
" 'Dochody_podatek_lesny',\n",
" 'Dochody_podatek_PCC',\n",
" 'Dochody_podatek_od_dzialalnosci_gospodarczej',\n",
" 'Dochody_podatek_od_nieruchomosci',\n",
" 'Dochody_podatek_od_spadkow',\n",
" 'Dochody_podatek_od_srodkow_transportowych',\n",
" 'Dochody_podatek_rolny',\n",
" 'Dochody_podatek_odrebne_ustawy',\n",
" 'Dochody_razem',\n",
" 'Dochody_z_majatku',\n",
" 'Dochody_z_najmu_i_dzierzawy', \n",
" 'Dochody_z_uslug',\n",
" 'Dochody_dofinansowanie_inwestycyjne',\n",
" 'Dochody_dofinansowanie_razem',\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_fizycznych',\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_prywatnych',\n",
" 'Udzialy_w_podatkach_dochodowych_razem',\n",
" 'Wplywy_z_innych_lokalnych_oplat',\n",
" 'Wplywy_z_oplaty_eksploatacyjnej',\n",
" 'Wplywy_z_oplaty_skarbowej',\n",
" 'Wplywy_z_oplaty_targowej',\n",
" 'Ludnosc_ogolem',\n",
" 'Ludnosc_w_wieku_poprodukcyjnym',\n",
" 'Ludnosc_w_wieku_produkcyjnym',\n",
" 'Ludnosc_w_wieku_produkcyjnym_mobilnym',\n",
" 'Ludnosc_w_wieku_produkcyjnym_niemobilnym',\n",
" 'Ludnosc_w_wieku_przedprodukcyjnym',\n",
" 'Ludnosc_mezczyzni',\n",
" 'Ludnosc_mezczyzni_w_wieku_poprodukcyjnym',\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym',\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym',\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym',\n",
" 'Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym',\n",
" 'Ludnosc_kobiety',\n",
" 'Ludnosc_kobiety_w_wieku_poprodukcyjnym',\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym',\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym',\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym',\n",
" 'Ludnosc_kobiety_w_wieku_przedprodukcyjnym']\n",
"\n",
"X = df_data[feature_names]\n",
"y = df_data['Suma']\n",
"\n",
"# scaler = StandardScaler()\n",
"# scaler = MinMaxScaler()\n",
"# X = scaler.fit_transform(X)\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)\n",
"\n",
"model = DecisionTreeRegressor(random_state=1)\n",
"model.fit(X_train, y_train)\n",
"\n",
"y_pred = model.predict(X_test)\n",
"mse = mean_squared_error(y_test, y_pred)\n",
"print(\"Mean Squared Error:\", mse)\n",
"\n",
"# Dodatkowe dane:\n",
"# Wojewodztwo\n",
"# ..."
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "code",
2024-05-06 23:50:22 +02:00
"execution_count": 143,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
2024-05-06 23:50:22 +02:00
"source": [
"# plt.figure(figsize=(15, 10))\n",
"# plot_tree(model, feature_names=feature_names, filled=True, rounded=True)\n",
"# plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHHCAYAAACRAnNyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYyElEQVR4nO3deXwTdf4/8NfM5GyTtpS2HKW0cosgHhUFBFRUFJb1WnUVFXDV1cVjdXUX1q/iBYjr9VsP8AJxZQEP0F3xwEURRRQQEBARyg2FtlCaqzlnPr8/2kZ6QZOmyaR9PR+PPh5kMknemZb2lZnP5/2RhBACRERERDokJ7oAIiIiosYwqBAREZFuMagQERGRbjGoEBERkW4xqBAREZFuMagQERGRbjGoEBERkW4xqBAREZFuMagQERGRbjGoENEJSZKERx55JNFlJNx5552H8847L3x79+7dkCQJb775ZsJqqqtujUTJjkGFKM5efvllSJKEs88+O+rnKC4uxiOPPIINGzbErjCdW758OSRJCn8ZjUZ069YNN910E3bu3Jno8iLy7bff4pFHHkFFRUWiSyHSPUOiCyBqa+bNm4eCggKsXr0aRUVF6NGjR8TPUVxcjEcffRQFBQU47bTTYl+kjt19990466yzEAwGsW7dOrz66qtYsmQJNm3ahM6dO8e1lvz8fHi9XhiNxoge9+233+LRRx/F+PHjkZGR0TLFEbUSPKNCFEe7du3Ct99+i2effRbZ2dmYN29eoktKOkOHDsUNN9yACRMm4IUXXsDTTz+N8vJyzJ07t9HHeDyeFqlFkiRYLBYoitIiz09EDCpEcTVv3jy0a9cOo0ePxu9+97tGg0pFRQXuvfdeFBQUwGw2o0uXLrjppptw+PBhLF++HGeddRYAYMKECeFLITXjJAoKCjB+/Ph6z1l37EIgEMDDDz+MM888E+np6UhNTcXQoUPx5ZdfRvy+SkpKYDAY8Oijj9a775dffoEkSXjxxRcBAMFgEI8++ih69uwJi8WC9u3b49xzz8Xnn38e8esCwAUXXACgKgQCwCOPPAJJkrBlyxZcf/31aNeuHc4999zw/m+//TbOPPNMWK1WZGZm4ve//z327dtX73lfffVVdO/eHVarFQMHDsTXX39db5/Gxqhs3boV11xzDbKzs2G1WtG7d288+OCD4foeeOABAMBJJ50U/v7t3r27RWokSna89EMUR/PmzcOVV14Jk8mE6667DjNnzsSaNWvCwQMA3G43hg4dip9//hk333wzzjjjDBw+fBj/+c9/sH//fpx88sl47LHH8PDDD+O2227D0KFDAQCDBw+OqBan04nXX38d1113HW699Va4XC688cYbGDlyJFavXh3RJaUOHTpg+PDheOeddzBlypRa9y1cuBCKouDqq68GUPWHevr06bjlllswcOBAOJ1OrF27FuvWrcNFF10U0XsAgB07dgAA2rdvX2v71VdfjZ49e2LatGkQQgAApk6dioceegjXXHMNbrnlFpSVleGFF17AsGHDsH79+vBlmDfeeAN//OMfMXjwYPz5z3/Gzp078dvf/haZmZnIy8s7bj0bN27E0KFDYTQacdttt6GgoAA7duzAf//7X0ydOhVXXnkltm3bhvnz5+O5555DVlYWACA7OztuNRIlFUFEcbF27VoBQHz++edCCCE0TRNdunQR99xzT639Hn74YQFALFq0qN5zaJomhBBizZo1AoCYM2dOvX3y8/PFuHHj6m0fPny4GD58ePh2KBQSfr+/1j5Hjx4VHTp0EDfffHOt7QDElClTjvv+XnnlFQFAbNq0qdb2vn37igsuuCB8e8CAAWL06NHHfa6GfPnllwKAmD17tigrKxPFxcViyZIloqCgQEiSJNasWSOEEGLKlCkCgLjuuutqPX737t1CURQxderUWts3bdokDAZDeHsgEBA5OTnitNNOq3V8Xn31VQGg1jHctWtXve/DsGHDhN1uF3v27Kn1OjXfOyGE+Mc//iEAiF27drV4jUTJjpd+iOJk3rx56NChA84//3wAVeMbrr32WixYsACqqob3e//99zFgwABcccUV9Z5DkqSY1aMoCkwmEwBA0zSUl5cjFAqhsLAQ69ati/j5rrzyShgMBixcuDC8bfPmzdiyZQuuvfba8LaMjAz89NNP2L59e1R133zzzcjOzkbnzp0xevRoeDwezJ07F4WFhbX2u/3222vdXrRoETRNwzXXXIPDhw+Hvzp27IiePXuGL3mtXbsWpaWluP3228PHBwDGjx+P9PT049ZWVlaGFStW4Oabb0bXrl1r3deU7108aiRKNq0mqKxYsQJjxoxB586dIUkSPvjgg4if47PPPsM555wDu92O7OxsXHXVVbWuGxNFS1VVLFiwAOeffz527dqFoqIiFBUV4eyzz0ZJSQmWLVsW3nfHjh3o169fXOqaO3cuTj311PBYkezsbCxZsgQOhyPi58rKysKIESPwzjvvhLctXLgQBoMBV155ZXjbY489hoqKCvTq1Qv9+/fHAw88gI0bNzb5dR5++GF8/vnn+OKLL7Bx40YUFxfjxhtvrLffSSedVOv29u3bIYRAz549kZ2dXevr559/RmlpKQBgz549AICePXvWenzNdOjjqZkmHe33Lx41EiWbVjNGxePxYMCAAbj55ptr/VJsql27duGyyy7Dfffdh3nz5sHhcODee+/FlVdeGdWnS6JjffHFFzh48CAWLFiABQsW1Lt/3rx5uPjii2PyWo19cldVtdbslLfffhvjx4/H5ZdfjgceeAA5OTlQFAXTp08Pj/uI1O9//3tMmDABGzZswGmnnYZ33nkHI0aMCI/DAIBhw4Zhx44d+PDDD7F06VK8/vrreO655zBr1izccsstJ3yN/v3748ILLzzhflartdZtTdMgSRI++eSTBmfp2Gy2JrzDlpUMNRLFW6sJKpdeeikuvfTSRu/3+/148MEHMX/+fFRUVKBfv36YMWNGeBbEDz/8AFVV8cQTT0CWq0403X///bjssssQDAYj7pNAdKx58+YhJycHL730Ur37Fi1ahMWLF2PWrFmwWq3o3r07Nm/efNznO95lhHbt2jXYSGzPnj21Pm2/99576NatGxYtWlTr+eoOho3E5Zdfjj/+8Y/hyz/btm3D5MmT6+2XmZmJCRMmYMKECXC73Rg2bBgeeeSRJgWVaHXv3h1CCJx00kno1atXo/vl5+cDqDq7UTOjCKiarbRr1y4MGDCg0cfWHN9ov3/xqJEo2bSaSz8ncuedd2LVqlVYsGABNm7ciKuvvhqXXHJJ+Dr5mWeeCVmWMWfOHKiqCofDgX/961+48MILGVKoWbxeLxYtWoTf/OY3+N3vflfv684774TL5cJ//vMfAMBVV12FH3/8EYsXL673XKJ69kpqaioANBhIunfvju+++w6BQCC87aOPPqo3vbXmE3vNcwLA999/j1WrVkX9XjMyMjBy5Ei88847WLBgAUwmEy6//PJa+xw5cqTWbZvNhh49esDv90f9uk1x5ZVXQlEUPProo7XeM1B1DGrqKiwsRHZ2NmbNmlXrGL755psn7CSbnZ2NYcOGYfbs2di7d2+916jR2PcvHjUSJZ0EDeJtUQDE4sWLw7f37NkjFEURBw4cqLXfiBEjxOTJk8O3ly9fLnJycoSiKAKAGDRokDh69GicqqbWasGCBQKA+OCDDxq8X1VVkZ2dLcaMGSOEEMLlcom+ffsKRVHErbfeKmbNmiWmTZsmzjnnHLFhwwYhRNWsj4yMDNG7d2/x+uuvi/nz54udO3cKIYT49NNPBQBx/vnni5kzZ4r7779fdOzYUXTv3r3WbJDZs2cLAOK3v/2teOWVV8SkSZNERkaGOOWUU0R+fn6tGtGEWT813n77bQFA2O328Hs6Vk5OjrjmmmvEjBkzxGuvvSb++Mc/CkmSxF133XXc562Z9fPuu+8ed7+aWT9lZWX17ps+fboAIAYPHiyeeuopMXPmTPHXv/5V9OzZU/zjH/8I71czg2nIkCHin//8p7j33ntFRkaG6Nat2wln/WzYsEHYbDbRvn17MXn
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(y_test, y_pred, alpha=0.5)\n",
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.title('Actual vs Predicted')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.39331 — Dochody_z_majatku\n",
"0.09103 — Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym\n",
"0.06877 — Udzialy_w_podatkach_dochodowych_od_osob_prywatnych\n",
"0.06441 — Wplywy_z_oplaty_skarbowej\n",
"0.05557 — Wynagrodzenie_w_relacji_do_sredniej\n",
"0.04363 — Wynagrodzenie_ogolem\n",
"0.03375 — Dochody_podatek_od_nieruchomosci\n",
"0.02552 — Dochody_z_uslug\n",
"0.02151 — Wplywy_z_oplaty_targowej\n",
"0.02078 — Udzialy_w_podatkach_dochodowych_od_osob_fizycznych\n",
"0.01974 — Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym\n",
"0.01962 — Wplywy_z_oplaty_eksploatacyjnej\n",
"0.01867 — Dochody_z_najmu_i_dzierzawy\n",
"0.01638 — Dochody_dofinansowanie_inwestycyjne\n",
"0.01573 — Dochody_podatek_rolny\n",
"0.01439 — Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym\n",
"0.01180 — Dochody_podatek_od_srodkow_transportowych\n",
"0.00875 — Dochody_podatek_od_dzialalnosci_gospodarczej\n",
"0.00809 — Dochody_podatek_PCC\n",
"0.00663 — Dochody_podatek_odrebne_ustawy\n",
"0.00634 — Dochody_podatek_lesny\n",
"0.00547 — Dochody_dofinansowanie_razem\n",
"0.00444 — Wplywy_z_innych_lokalnych_oplat\n",
"0.00427 — Dochody_razem\n",
"0.00398 — Ludnosc_mezczyzni_w_wieku_poprodukcyjnym\n",
"0.00367 — Powierzchnia\n",
"0.00329 — Dochody_podatek_od_spadkow\n",
"0.00248 — Udzialy_w_podatkach_dochodowych_razem\n",
"0.00160 — Ludnosc_kobiety_w_wieku_przedprodukcyjnym\n",
"0.00137 — Ludnosc_kobiety\n",
"0.00128 — Ludnosc_mezczyzni_w_wieku_produkcyjnym\n",
"0.00115 — Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym\n",
"0.00095 — Ludnosc_w_wieku_przedprodukcyjnym\n",
"0.00072 — Ludnosc_mezczyzni\n",
"0.00030 — Ludnosc_w_wieku_poprodukcyjnym\n",
"0.00013 — Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym\n",
"0.00012 — Ludnosc_kobiety_w_wieku_produkcyjnym\n",
"0.00012 — Ludnosc_kobiety_w_wieku_poprodukcyjnym\n",
"0.00009 — Ludnosc_ogolem\n",
"0.00008 — Ludnosc_w_wieku_produkcyjnym\n",
"0.00005 — Ludnosc_w_wieku_produkcyjnym_niemobilnym\n",
"0.00001 — Ludnosc_w_wieku_produkcyjnym_mobilnym\n"
]
}
],
"source": [
"feature_importance = dict(zip(feature_names, model.feature_importances_))\n",
"for feature, importance in sorted(feature_importance.items(), key=lambda x: x[1], reverse=True):\n",
" print(f\"{importance:.5f} \\u2014 {feature}\")"
]
2024-05-06 22:55:21 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}