2024-05-06 22:55:21 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 411,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 412,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"pd.options.display.float_format = '{:.2f}'.format"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 413,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_7800\\3760256257.py:1: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-05-06 22:55:21 +02:00
" df_dofinansowanie = pd.read_csv(\n"
]
}
],
"source": [
"df_dofinansowanie = pd.read_csv(\n",
" 'umowy_pelna_lista_krajowe.csv',\n",
" encoding='ISO-8859-2',\n",
" converters={'TERYT pe?ny': str},\n",
" thousands=',')\n",
"\n",
"df_dofinansowanie = df_dofinansowanie.loc[df_dofinansowanie['TERYT pe?ny'] != ''].reset_index(drop=True)\n",
"\n",
"df_dofinansowanie['Dofinansowanie UE (PLN)'] = \\\n",
" df_dofinansowanie['Dofinansowanie UE (PLN)'].apply(pd.to_numeric)\n",
"\n",
"df_dofinansowanie['Data rozpocz?cia realizacji'] = pd.to_datetime(df_dofinansowanie['Data rozpocz?cia realizacji'])\n",
"df_dofinansowanie['Rok rozpocz?cia realizacji'] = df_dofinansowanie['Data rozpocz?cia realizacji'].dt.year\n",
"\n",
"df_dofinansowanie['Data podpisania umowy pierwotnej'] = pd.to_datetime(df_dofinansowanie['Data podpisania umowy pierwotnej'])\n",
"df_dofinansowanie['Rok podpisania umowy pierwotnej'] = df_dofinansowanie['Data podpisania umowy pierwotnej'].dt.year"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 414,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Program Operacyjny Inteligentny Rozwój'\n",
" 'Program Operacyjny Infrastruktura i ?rodowisko 2014-2020'\n",
" 'Program Operacyjny Polska Cyfrowa'\n",
" 'Program Operacyjny Pomoc Techniczna 2014-2020'\n",
" 'Program Operacyjny Polska Wschodnia'\n",
" 'Program Operacyjny Wiedza Edukacja Rozwój']\n"
]
}
],
"source": [
"print(df_dofinansowanie['Program operacyjny'].drop_duplicates().values)"
]
},
{
"cell_type": "code",
"execution_count": 415,
"metadata": {},
"outputs": [],
"source": [
"# Wybór programu operacyjnego...\n",
"df_dofinansowanie = df_dofinansowanie.loc[df_dofinansowanie['Program operacyjny'] == 'Program Operacyjny Infrastruktura i ?rodowisko 2014-2020'].reset_index(drop=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 416,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_dofinansowanie_agg = df_dofinansowanie \\\n",
" .groupby(['TERYT pe?ny', 'Rok rozpocz?cia realizacji'])['Dofinansowanie UE (PLN)'].sum().reset_index()\n",
"df_dofinansowanie_agg = df_dofinansowanie_agg \\\n",
" .rename(columns={'TERYT pe?ny': 'Kod', 'Rok rozpocz?cia realizacji': 'Rok', 'Dofinansowanie UE (PLN)': 'Suma'})\n",
"df_dofinansowanie_agg = df_dofinansowanie_agg \\\n",
" .loc[df_dofinansowanie_agg['Kod'].str.len() == 7].reset_index(drop=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 417,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_podz = pd.read_csv(\n",
" 'PODZ_1410_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str})\n",
"df_podz = df_podz[['Kod', 'Rok', 'Wartosc']]\n",
"df_podz = df_podz.loc[df_podz['Kod'].str.endswith(('1', '2', '3'))]\n",
"df_podz = df_podz.dropna()\n",
"df_podz = df_podz.rename(columns={\n",
" 'Wartosc': 'Powierzchnia'})"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 418,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_wyna = pd.read_csv(\n",
" 'WYNA_2497_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_wyna = df_wyna[['Kod', 'Wyszczególnienie', 'Rok', 'Wartosc']]\n",
"df_wyna = df_wyna.dropna()\n",
"df_wyna = df_wyna.pivot_table(index=['Kod', 'Rok'], columns='Wyszczególnienie', values='Wartosc').reset_index()\n",
"df_wyna = df_wyna.rename(columns={\n",
" 'ogółem': 'Wynagrodzenie_ogolem',\n",
" 'przeciętne miesięczne wynagrodzenia brutto w relacji do średniej krajowej (Polska=100)': 'Wynagrodzenie_w_relacji_do_sredniej'})"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 419,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_7800\\1671418303.py:1: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-05-06 22:55:21 +02:00
" df_fina_1 = pd.read_csv(\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Rodzaje dochodów</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Dochody_podatek_lesny</th>\n",
" <th>Dochody_podatek_PCC</th>\n",
2024-05-06 23:50:22 +02:00
" <th>Dochody_podatek_od_dzialalnosci_gospodarczej</th>\n",
2024-05-06 22:55:21 +02:00
" <th>Dochody_podatek_od_nieruchomosci</th>\n",
" <th>Dochody_podatek_od_spadkow</th>\n",
" <th>Dochody_podatek_od_srodkow_transportowych</th>\n",
" <th>Dochody_podatek_rolny</th>\n",
" <th>Dochody_podatek_odrebne_ustawy</th>\n",
" <th>Dochody_razem</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2004</td>\n",
" <td>NaN</td>\n",
" <td>549608.00</td>\n",
" <td>NaN</td>\n",
" <td>13532989.00</td>\n",
" <td>NaN</td>\n",
" <td>625159.00</td>\n",
" <td>23687.00</td>\n",
" <td>NaN</td>\n",
" <td>41378568.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2005</td>\n",
" <td>NaN</td>\n",
" <td>609855.00</td>\n",
" <td>NaN</td>\n",
" <td>13667398.00</td>\n",
" <td>NaN</td>\n",
" <td>700134.00</td>\n",
" <td>26634.00</td>\n",
" <td>15438121.00</td>\n",
" <td>43417443.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2006</td>\n",
" <td>NaN</td>\n",
" <td>844223.65</td>\n",
" <td>NaN</td>\n",
" <td>14633962.72</td>\n",
" <td>NaN</td>\n",
" <td>747182.64</td>\n",
" <td>11683.60</td>\n",
" <td>16647124.98</td>\n",
" <td>50319253.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2007</td>\n",
" <td>NaN</td>\n",
" <td>1344365.01</td>\n",
" <td>NaN</td>\n",
" <td>14944781.74</td>\n",
" <td>NaN</td>\n",
" <td>777345.52</td>\n",
" <td>19377.36</td>\n",
" <td>17436387.93</td>\n",
" <td>62025513.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2008</td>\n",
" <td>6799.55</td>\n",
" <td>1790135.40</td>\n",
" <td>NaN</td>\n",
" <td>16089534.56</td>\n",
" <td>NaN</td>\n",
" <td>836441.10</td>\n",
" <td>30823.60</td>\n",
" <td>19149551.45</td>\n",
" <td>80755930.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47078</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>154462.39</td>\n",
" <td>5361951.37</td>\n",
" <td>572868.36</td>\n",
" <td>108107448.79</td>\n",
" <td>437144.83</td>\n",
" <td>589658.88</td>\n",
" <td>51297.75</td>\n",
" <td>115274832.37</td>\n",
" <td>261780766.79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47079</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>150329.31</td>\n",
" <td>6088184.20</td>\n",
" <td>468411.51</td>\n",
" <td>38527846.59</td>\n",
" <td>228886.23</td>\n",
" <td>608637.40</td>\n",
" <td>64855.15</td>\n",
" <td>46137150.39</td>\n",
" <td>167638796.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47080</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>156556.52</td>\n",
" <td>5125090.74</td>\n",
" <td>329522.12</td>\n",
" <td>78767466.83</td>\n",
" <td>552009.16</td>\n",
" <td>558925.68</td>\n",
" <td>48689.09</td>\n",
" <td>85538260.14</td>\n",
" <td>263006955.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47081</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>163778.36</td>\n",
" <td>9082482.28</td>\n",
" <td>492045.28</td>\n",
" <td>78491368.35</td>\n",
" <td>947992.83</td>\n",
" <td>602586.14</td>\n",
" <td>59824.46</td>\n",
" <td>89840077.70</td>\n",
" <td>252345800.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47082</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>174823.49</td>\n",
" <td>7474079.65</td>\n",
" <td>1019054.56</td>\n",
" <td>84996948.99</td>\n",
" <td>593315.54</td>\n",
" <td>627169.86</td>\n",
" <td>50987.00</td>\n",
" <td>94936379.09</td>\n",
" <td>259310641.60</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>47083 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
"Rodzaje dochodów Kod Rok Dochody_podatek_lesny Dochody_podatek_PCC \n",
"0 0201011 2004 NaN 549608.00 \\\n",
"1 0201011 2005 NaN 609855.00 \n",
"2 0201011 2006 NaN 844223.65 \n",
"3 0201011 2007 NaN 1344365.01 \n",
"4 0201011 2008 6799.55 1790135.40 \n",
"... ... ... ... ... \n",
"47078 3263011 2018 154462.39 5361951.37 \n",
"47079 3263011 2019 150329.31 6088184.20 \n",
"47080 3263011 2020 156556.52 5125090.74 \n",
"47081 3263011 2021 163778.36 9082482.28 \n",
"47082 3263011 2022 174823.49 7474079.65 \n",
"\n",
2024-05-06 23:50:22 +02:00
"Rodzaje dochodów Dochody_podatek_od_dzialalnosci_gospodarczej \n",
"0 NaN \\\n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 572868.36 \n",
"47079 468411.51 \n",
"47080 329522.12 \n",
"47081 492045.28 \n",
"47082 1019054.56 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_od_nieruchomosci \n",
"0 13532989.00 \\\n",
"1 13667398.00 \n",
"2 14633962.72 \n",
"3 14944781.74 \n",
"4 16089534.56 \n",
"... ... \n",
"47078 108107448.79 \n",
"47079 38527846.59 \n",
"47080 78767466.83 \n",
"47081 78491368.35 \n",
"47082 84996948.99 \n",
2024-05-06 22:55:21 +02:00
"\n",
"Rodzaje dochodów Dochody_podatek_od_spadkow \n",
"0 NaN \\\n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 437144.83 \n",
"47079 228886.23 \n",
"47080 552009.16 \n",
"47081 947992.83 \n",
"47082 593315.54 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_od_srodkow_transportowych \n",
"0 625159.00 \\\n",
"1 700134.00 \n",
"2 747182.64 \n",
"3 777345.52 \n",
"4 836441.10 \n",
"... ... \n",
"47078 589658.88 \n",
"47079 608637.40 \n",
"47080 558925.68 \n",
"47081 602586.14 \n",
"47082 627169.86 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_rolny Dochody_podatek_odrebne_ustawy \n",
"0 23687.00 NaN \\\n",
"1 26634.00 15438121.00 \n",
"2 11683.60 16647124.98 \n",
"3 19377.36 17436387.93 \n",
"4 30823.60 19149551.45 \n",
"... ... ... \n",
"47078 51297.75 115274832.37 \n",
"47079 64855.15 46137150.39 \n",
"47080 48689.09 85538260.14 \n",
"47081 59824.46 89840077.70 \n",
"47082 50987.00 94936379.09 \n",
"\n",
"Rodzaje dochodów Dochody_razem \n",
"0 41378568.00 \n",
"1 43417443.00 \n",
"2 50319253.08 \n",
"3 62025513.24 \n",
"4 80755930.93 \n",
"... ... \n",
"47078 261780766.79 \n",
"47079 167638796.15 \n",
"47080 263006955.07 \n",
"47081 252345800.93 \n",
"47082 259310641.60 \n",
"\n",
"[47083 rows x 11 columns]"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 419,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fina_1 = pd.read_csv(\n",
" 'FINA_2622_CREL_1.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_fina_1 = df_fina_1[['Kod', 'Rodzaje dochodów', 'Rok', 'Wartosc']]\n",
"df_fina_1 = df_fina_1.dropna()\n",
"df_fina_1 = df_fina_1.pivot_table(index=['Kod', 'Rok'], columns='Rodzaje dochodów', values='Wartosc').reset_index()\n",
"df_fina_1 = df_fina_1.rename(columns={\n",
" 'dochody podatkowe - podatek leśny': 'Dochody_podatek_lesny',\n",
" 'dochody podatkowe - podatek od czynności cywilnoprawnych': 'Dochody_podatek_PCC',\n",
2024-05-06 23:50:22 +02:00
" 'dochody podatkowe - podatek od działalności gospodarczej osób fizycznych, opłacany w formie karty podatkowej': 'Dochody_podatek_od_dzialalnosci_gospodarczej',\n",
2024-05-06 22:55:21 +02:00
" 'dochody podatkowe - podatek od nieruchomości': 'Dochody_podatek_od_nieruchomosci',\n",
" 'dochody podatkowe - podatek od spadków i darowizn': 'Dochody_podatek_od_spadkow',\n",
" 'dochody podatkowe - podatek od środków transportowych': 'Dochody_podatek_od_srodkow_transportowych',\n",
" 'dochody podatkowe - podatek rolny': 'Dochody_podatek_rolny',\n",
" 'dochody podatkowe - ustalone i pobierane na podstawie odrębnych ustaw': 'Dochody_podatek_odrebne_ustawy',\n",
" 'razem': 'Dochody_razem'})\n",
"\n",
"df_fina_1"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 420,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_7800\\2161929356.py:1: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-05-06 22:55:21 +02:00
" df_fina_2 = pd.read_csv(\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Rodzaje dochodów</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Dochody_z_majatku</th>\n",
" <th>Dochody_z_najmu_i_dzierzawy</th>\n",
" <th>Dochody_z_uslug</th>\n",
" <th>Dochody_dofinansowanie_inwestycyjne</th>\n",
" <th>Dochody_dofinansowanie_razem</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_od_osob_fizycznych</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_od_osob_prywatnych</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_razem</th>\n",
" <th>Wplywy_z_innych_lokalnych_oplat</th>\n",
" <th>Wplywy_z_oplaty_eksploatacyjnej</th>\n",
" <th>Wplywy_z_oplaty_skarbowej</th>\n",
" <th>Wplywy_z_oplaty_targowej</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2004</td>\n",
" <td>5344205.00</td>\n",
" <td>NaN</td>\n",
" <td>184307.00</td>\n",
" <td>NaN</td>\n",
" <td>519209.00</td>\n",
" <td>13285456.00</td>\n",
" <td>1065169.00</td>\n",
" <td>14350625.00</td>\n",
" <td>44200.00</td>\n",
" <td>NaN</td>\n",
" <td>1209998.00</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2005</td>\n",
" <td>4560489.00</td>\n",
" <td>NaN</td>\n",
" <td>96462.00</td>\n",
" <td>NaN</td>\n",
" <td>9024183.00</td>\n",
" <td>15985331.00</td>\n",
" <td>1170863.00</td>\n",
" <td>17156194.00</td>\n",
" <td>42840.00</td>\n",
" <td>NaN</td>\n",
" <td>1282943.00</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2006</td>\n",
" <td>8528727.69</td>\n",
" <td>NaN</td>\n",
" <td>231470.96</td>\n",
" <td>8752288.98</td>\n",
" <td>8864860.57</td>\n",
" <td>18101668.00</td>\n",
" <td>1048115.83</td>\n",
" <td>19149783.83</td>\n",
" <td>37365.00</td>\n",
" <td>NaN</td>\n",
" <td>1203990.73</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2007</td>\n",
" <td>15042480.34</td>\n",
" <td>9219682.12</td>\n",
" <td>339654.15</td>\n",
" <td>18153240.30</td>\n",
" <td>18438743.21</td>\n",
" <td>21785308.00</td>\n",
" <td>1336702.02</td>\n",
" <td>23122010.02</td>\n",
" <td>78798.51</td>\n",
" <td>NaN</td>\n",
" <td>1228704.53</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2008</td>\n",
" <td>22797881.07</td>\n",
" <td>9546379.31</td>\n",
" <td>787256.69</td>\n",
" <td>5046691.69</td>\n",
" <td>5182137.79</td>\n",
" <td>23974587.00</td>\n",
" <td>1532633.44</td>\n",
" <td>25507220.44</td>\n",
" <td>83882.94</td>\n",
" <td>NaN</td>\n",
" <td>1364245.93</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47078</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>16419859.31</td>\n",
" <td>4261374.83</td>\n",
" <td>1996824.80</td>\n",
" <td>25285.92</td>\n",
" <td>237485.34</td>\n",
" <td>52799183.00</td>\n",
" <td>2690098.17</td>\n",
" <td>55489281.17</td>\n",
" <td>10458871.30</td>\n",
" <td>4684.54</td>\n",
" <td>434077.88</td>\n",
" <td>608625.90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47079</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>8844350.07</td>\n",
" <td>4324758.68</td>\n",
" <td>2187576.47</td>\n",
" <td>0.00</td>\n",
" <td>225831.84</td>\n",
" <td>55319040.00</td>\n",
" <td>2770684.17</td>\n",
" <td>58089724.17</td>\n",
" <td>11369287.11</td>\n",
" <td>3456.95</td>\n",
" <td>415686.53</td>\n",
" <td>610059.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47080</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>13485033.97</td>\n",
" <td>6159923.01</td>\n",
" <td>1917372.55</td>\n",
" <td>21002107.00</td>\n",
" <td>21192313.05</td>\n",
" <td>53739656.00</td>\n",
" <td>3144444.38</td>\n",
" <td>56884100.38</td>\n",
" <td>12281916.71</td>\n",
" <td>5157.50</td>\n",
" <td>355201.29</td>\n",
" <td>507341.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47081</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>16928500.75</td>\n",
" <td>7582499.62</td>\n",
" <td>4110105.72</td>\n",
" <td>888293.63</td>\n",
" <td>1072910.83</td>\n",
" <td>63936763.00</td>\n",
" <td>3975531.95</td>\n",
" <td>67912294.95</td>\n",
" <td>17127683.55</td>\n",
" <td>27746.70</td>\n",
" <td>416473.03</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47082</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>30415536.99</td>\n",
" <td>8651170.05</td>\n",
" <td>4117086.30</td>\n",
" <td>207597.50</td>\n",
" <td>800347.63</td>\n",
" <td>64657287.40</td>\n",
" <td>4082611.64</td>\n",
" <td>68739899.04</td>\n",
" <td>19150342.25</td>\n",
" <td>5035.87</td>\n",
" <td>421424.91</td>\n",
" <td>1233266.30</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>47083 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
"Rodzaje dochodów Kod Rok Dochody_z_majatku \n",
"0 0201011 2004 5344205.00 \\\n",
"1 0201011 2005 4560489.00 \n",
"2 0201011 2006 8528727.69 \n",
"3 0201011 2007 15042480.34 \n",
"4 0201011 2008 22797881.07 \n",
"... ... ... ... \n",
"47078 3263011 2018 16419859.31 \n",
"47079 3263011 2019 8844350.07 \n",
"47080 3263011 2020 13485033.97 \n",
"47081 3263011 2021 16928500.75 \n",
"47082 3263011 2022 30415536.99 \n",
"\n",
"Rodzaje dochodów Dochody_z_najmu_i_dzierzawy Dochody_z_uslug \n",
"0 NaN 184307.00 \\\n",
"1 NaN 96462.00 \n",
"2 NaN 231470.96 \n",
"3 9219682.12 339654.15 \n",
"4 9546379.31 787256.69 \n",
"... ... ... \n",
"47078 4261374.83 1996824.80 \n",
"47079 4324758.68 2187576.47 \n",
"47080 6159923.01 1917372.55 \n",
"47081 7582499.62 4110105.72 \n",
"47082 8651170.05 4117086.30 \n",
"\n",
"Rodzaje dochodów Dochody_dofinansowanie_inwestycyjne \n",
"0 NaN \\\n",
"1 NaN \n",
"2 8752288.98 \n",
"3 18153240.30 \n",
"4 5046691.69 \n",
"... ... \n",
"47078 25285.92 \n",
"47079 0.00 \n",
"47080 21002107.00 \n",
"47081 888293.63 \n",
"47082 207597.50 \n",
"\n",
"Rodzaje dochodów Dochody_dofinansowanie_razem \n",
"0 519209.00 \\\n",
"1 9024183.00 \n",
"2 8864860.57 \n",
"3 18438743.21 \n",
"4 5182137.79 \n",
"... ... \n",
"47078 237485.34 \n",
"47079 225831.84 \n",
"47080 21192313.05 \n",
"47081 1072910.83 \n",
"47082 800347.63 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_od_osob_fizycznych \n",
"0 13285456.00 \\\n",
"1 15985331.00 \n",
"2 18101668.00 \n",
"3 21785308.00 \n",
"4 23974587.00 \n",
"... ... \n",
"47078 52799183.00 \n",
"47079 55319040.00 \n",
"47080 53739656.00 \n",
"47081 63936763.00 \n",
"47082 64657287.40 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_od_osob_prywatnych \n",
"0 1065169.00 \\\n",
"1 1170863.00 \n",
"2 1048115.83 \n",
"3 1336702.02 \n",
"4 1532633.44 \n",
"... ... \n",
"47078 2690098.17 \n",
"47079 2770684.17 \n",
"47080 3144444.38 \n",
"47081 3975531.95 \n",
"47082 4082611.64 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_razem \n",
"0 14350625.00 \\\n",
"1 17156194.00 \n",
"2 19149783.83 \n",
"3 23122010.02 \n",
"4 25507220.44 \n",
"... ... \n",
"47078 55489281.17 \n",
"47079 58089724.17 \n",
"47080 56884100.38 \n",
"47081 67912294.95 \n",
"47082 68739899.04 \n",
"\n",
"Rodzaje dochodów Wplywy_z_innych_lokalnych_oplat \n",
"0 44200.00 \\\n",
"1 42840.00 \n",
"2 37365.00 \n",
"3 78798.51 \n",
"4 83882.94 \n",
"... ... \n",
"47078 10458871.30 \n",
"47079 11369287.11 \n",
"47080 12281916.71 \n",
"47081 17127683.55 \n",
"47082 19150342.25 \n",
"\n",
"Rodzaje dochodów Wplywy_z_oplaty_eksploatacyjnej Wplywy_z_oplaty_skarbowej \n",
"0 NaN 1209998.00 \\\n",
"1 NaN 1282943.00 \n",
"2 NaN 1203990.73 \n",
"3 NaN 1228704.53 \n",
"4 NaN 1364245.93 \n",
"... ... ... \n",
"47078 4684.54 434077.88 \n",
"47079 3456.95 415686.53 \n",
"47080 5157.50 355201.29 \n",
"47081 27746.70 416473.03 \n",
"47082 5035.87 421424.91 \n",
"\n",
"Rodzaje dochodów Wplywy_z_oplaty_targowej \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 608625.90 \n",
"47079 610059.50 \n",
"47080 507341.00 \n",
"47081 0.00 \n",
"47082 1233266.30 \n",
"\n",
"[47083 rows x 14 columns]"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 420,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fina_2 = pd.read_csv(\n",
" 'FINA_2622_CREL_2.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_fina_2 = df_fina_2[['Kod', 'Rodzaje dochodów', 'Rok', 'Wartosc']]\n",
"df_fina_2 = df_fina_2.dropna()\n",
"df_fina_2 = df_fina_2.pivot_table(index=['Kod', 'Rok'], columns='Rodzaje dochodów', values='Wartosc').reset_index()\n",
"df_fina_2 = df_fina_2.rename(columns={\n",
" 'dochody z majątku': 'Dochody_z_majatku',\n",
" 'dochody z majątku - dochody z najmu i dzierżawy składników majątkowych JST oraz innych umów o podobnym charakterze': 'Dochody_z_najmu_i_dzierzawy',\n",
" 'pozostałe dochody - wpływy z usług': 'Dochody_z_uslug',\n",
" 'pozostałe dochody - środki na dofinansowanie własnych zadań pozyskane z innych źródeł - inwestycyjne': 'Dochody_dofinansowanie_inwestycyjne',\n",
" 'pozostałe dochody - środki na dofinansowanie własnych zadań pozyskane z innych źródeł - razem': 'Dochody_dofinansowanie_razem',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa podatek dochodowy od osób fizycznych': 'Udzialy_w_podatkach_dochodowych_od_osob_fizycznych',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa podatek dochodowy od osób prawnych': 'Udzialy_w_podatkach_dochodowych_od_osob_prywatnych',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa razem': 'Udzialy_w_podatkach_dochodowych_razem',\n",
" 'wpływy z innych lokalnych opłat pobieranych przez jednostki samorządu terytorialnego na podstawie odrębnych ustaw': 'Wplywy_z_innych_lokalnych_oplat',\n",
" 'wpływy z opłaty eksploatacyjnej': 'Wplywy_z_oplaty_eksploatacyjnej',\n",
" 'wpływy z opłaty skarbowej': 'Wplywy_z_oplaty_skarbowej',\n",
" 'wpływy z opłaty targowej': 'Wplywy_z_oplaty_targowej'})\n",
"\n",
"df_fina_2"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 421,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_ogolem</th>\n",
" <th>Ludnosc_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>40309.00</td>\n",
" <td>7683.00</td>\n",
" <td>26085.00</td>\n",
" <td>15183.00</td>\n",
" <td>10902.00</td>\n",
" <td>6541.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>40119.00</td>\n",
" <td>8020.00</td>\n",
" <td>25647.00</td>\n",
" <td>15047.00</td>\n",
" <td>10600.00</td>\n",
" <td>6452.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>39851.00</td>\n",
" <td>8392.00</td>\n",
" <td>25160.00</td>\n",
" <td>14932.00</td>\n",
" <td>10228.00</td>\n",
" <td>6299.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>39603.00</td>\n",
" <td>8678.00</td>\n",
" <td>24720.00</td>\n",
" <td>14784.00</td>\n",
" <td>9936.00</td>\n",
" <td>6205.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>39464.00</td>\n",
" <td>8971.00</td>\n",
" <td>24307.00</td>\n",
" <td>14645.00</td>\n",
" <td>9662.00</td>\n",
" <td>6186.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48606</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>40910.00</td>\n",
" <td>10472.00</td>\n",
" <td>24549.00</td>\n",
" <td>14683.00</td>\n",
" <td>9866.00</td>\n",
" <td>5889.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48607</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>40888.00</td>\n",
" <td>10788.00</td>\n",
" <td>24209.00</td>\n",
" <td>14429.00</td>\n",
" <td>9780.00</td>\n",
" <td>5891.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48608</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>40326.00</td>\n",
" <td>10962.00</td>\n",
" <td>23544.00</td>\n",
" <td>13798.00</td>\n",
" <td>9746.00</td>\n",
" <td>5820.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48609</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>39834.00</td>\n",
" <td>11050.00</td>\n",
" <td>22976.00</td>\n",
" <td>13277.00</td>\n",
" <td>9699.00</td>\n",
" <td>5808.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48610</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>39368.00</td>\n",
" <td>11157.00</td>\n",
" <td>22486.00</td>\n",
" <td>12802.00</td>\n",
" <td>9684.00</td>\n",
" <td>5725.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48611 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_ogolem Ludnosc_w_wieku_poprodukcyjnym \n",
"0 0201011 2010 40309.00 7683.00 \\\n",
"1 0201011 2011 40119.00 8020.00 \n",
"2 0201011 2012 39851.00 8392.00 \n",
"3 0201011 2013 39603.00 8678.00 \n",
"4 0201011 2014 39464.00 8971.00 \n",
"... ... ... ... ... \n",
"48606 3263011 2018 40910.00 10472.00 \n",
"48607 3263011 2019 40888.00 10788.00 \n",
"48608 3263011 2020 40326.00 10962.00 \n",
"48609 3263011 2021 39834.00 11050.00 \n",
"48610 3263011 2022 39368.00 11157.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_produkcyjnym Ludnosc_w_wieku_produkcyjnym_mobilnym \n",
"0 26085.00 15183.00 \\\n",
"1 25647.00 15047.00 \n",
"2 25160.00 14932.00 \n",
"3 24720.00 14784.00 \n",
"4 24307.00 14645.00 \n",
"... ... ... \n",
"48606 24549.00 14683.00 \n",
"48607 24209.00 14429.00 \n",
"48608 23544.00 13798.00 \n",
"48609 22976.00 13277.00 \n",
"48610 22486.00 12802.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_produkcyjnym_niemobilnym \n",
"0 10902.00 \\\n",
"1 10600.00 \n",
"2 10228.00 \n",
"3 9936.00 \n",
"4 9662.00 \n",
"... ... \n",
"48606 9866.00 \n",
"48607 9780.00 \n",
"48608 9746.00 \n",
"48609 9699.00 \n",
"48610 9684.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_przedprodukcyjnym \n",
"0 6541.00 \n",
"1 6452.00 \n",
"2 6299.00 \n",
"3 6205.00 \n",
"4 6186.00 \n",
"... ... \n",
"48606 5889.00 \n",
"48607 5891.00 \n",
"48608 5820.00 \n",
"48609 5808.00 \n",
"48610 5725.00 \n",
"\n",
"[48611 rows x 8 columns]"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 421,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_1 = pd.read_csv( # ogolem\n",
" 'LUDN_1342_CREL_1.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_1 = df_ludn_1[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_1 = df_ludn_1.dropna()\n",
"df_ludn_1 = df_ludn_1.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_1 = df_ludn_1.rename(columns={\n",
" 'ogółem': 'Ludnosc_ogolem',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_1"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 422,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_mezczyzni</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>19085.00</td>\n",
" <td>2153.00</td>\n",
" <td>13535.00</td>\n",
" <td>7720.00</td>\n",
" <td>5815.00</td>\n",
" <td>3397.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>18985.00</td>\n",
" <td>2222.00</td>\n",
" <td>13398.00</td>\n",
" <td>7647.00</td>\n",
" <td>5751.00</td>\n",
" <td>3365.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>18859.00</td>\n",
" <td>2370.00</td>\n",
" <td>13238.00</td>\n",
" <td>7611.00</td>\n",
" <td>5627.00</td>\n",
" <td>3251.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>18737.00</td>\n",
" <td>2477.00</td>\n",
" <td>13028.00</td>\n",
" <td>7501.00</td>\n",
" <td>5527.00</td>\n",
" <td>3232.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>18640.00</td>\n",
" <td>2620.00</td>\n",
" <td>12832.00</td>\n",
" <td>7442.00</td>\n",
" <td>5390.00</td>\n",
" <td>3188.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48606</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>19690.00</td>\n",
" <td>3501.00</td>\n",
" <td>13202.00</td>\n",
" <td>7547.00</td>\n",
" <td>5655.00</td>\n",
" <td>2987.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48607</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>19683.00</td>\n",
" <td>3644.00</td>\n",
" <td>13044.00</td>\n",
" <td>7417.00</td>\n",
" <td>5627.00</td>\n",
" <td>2995.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48608</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>19356.00</td>\n",
" <td>3749.00</td>\n",
" <td>12617.00</td>\n",
" <td>6986.00</td>\n",
" <td>5631.00</td>\n",
" <td>2990.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48609</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>19096.00</td>\n",
" <td>3852.00</td>\n",
" <td>12267.00</td>\n",
" <td>6747.00</td>\n",
" <td>5520.00</td>\n",
" <td>2977.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48610</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>18869.00</td>\n",
" <td>3901.00</td>\n",
" <td>12009.00</td>\n",
" <td>6485.00</td>\n",
" <td>5524.00</td>\n",
" <td>2959.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48611 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_mezczyzni \n",
"0 0201011 2010 19085.00 \\\n",
"1 0201011 2011 18985.00 \n",
"2 0201011 2012 18859.00 \n",
"3 0201011 2013 18737.00 \n",
"4 0201011 2014 18640.00 \n",
"... ... ... ... \n",
"48606 3263011 2018 19690.00 \n",
"48607 3263011 2019 19683.00 \n",
"48608 3263011 2020 19356.00 \n",
"48609 3263011 2021 19096.00 \n",
"48610 3263011 2022 18869.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_poprodukcyjnym \n",
"0 2153.00 \\\n",
"1 2222.00 \n",
"2 2370.00 \n",
"3 2477.00 \n",
"4 2620.00 \n",
"... ... \n",
"48606 3501.00 \n",
"48607 3644.00 \n",
"48608 3749.00 \n",
"48609 3852.00 \n",
"48610 3901.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym \n",
"0 13535.00 \\\n",
"1 13398.00 \n",
"2 13238.00 \n",
"3 13028.00 \n",
"4 12832.00 \n",
"... ... \n",
"48606 13202.00 \n",
"48607 13044.00 \n",
"48608 12617.00 \n",
"48609 12267.00 \n",
"48610 12009.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym \n",
"0 7720.00 \\\n",
"1 7647.00 \n",
"2 7611.00 \n",
"3 7501.00 \n",
"4 7442.00 \n",
"... ... \n",
"48606 7547.00 \n",
"48607 7417.00 \n",
"48608 6986.00 \n",
"48609 6747.00 \n",
"48610 6485.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym \n",
"0 5815.00 \\\n",
"1 5751.00 \n",
"2 5627.00 \n",
"3 5527.00 \n",
"4 5390.00 \n",
"... ... \n",
"48606 5655.00 \n",
"48607 5627.00 \n",
"48608 5631.00 \n",
"48609 5520.00 \n",
"48610 5524.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym \n",
"0 3397.00 \n",
"1 3365.00 \n",
"2 3251.00 \n",
"3 3232.00 \n",
"4 3188.00 \n",
"... ... \n",
"48606 2987.00 \n",
"48607 2995.00 \n",
"48608 2990.00 \n",
"48609 2977.00 \n",
"48610 2959.00 \n",
"\n",
"[48611 rows x 8 columns]"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 422,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_2 = pd.read_csv( # mezczyzni\n",
" 'LUDN_1342_CREL_2.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_2 = df_ludn_2[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_2 = df_ludn_2.dropna()\n",
"df_ludn_2 = df_ludn_2.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_2 = df_ludn_2.rename(columns={\n",
" 'ogółem': 'Ludnosc_mezczyzni',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_mezczyzni_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_2"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 423,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_kobiety</th>\n",
" <th>Ludnosc_kobiety_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>21224.00</td>\n",
" <td>5530.00</td>\n",
" <td>12550.00</td>\n",
" <td>7463.00</td>\n",
" <td>5087.00</td>\n",
" <td>3144.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>21134.00</td>\n",
" <td>5798.00</td>\n",
" <td>12249.00</td>\n",
" <td>7400.00</td>\n",
" <td>4849.00</td>\n",
" <td>3087.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>20992.00</td>\n",
" <td>6022.00</td>\n",
" <td>11922.00</td>\n",
" <td>7321.00</td>\n",
" <td>4601.00</td>\n",
" <td>3048.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>20866.00</td>\n",
" <td>6201.00</td>\n",
" <td>11692.00</td>\n",
" <td>7283.00</td>\n",
" <td>4409.00</td>\n",
" <td>2973.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>20824.00</td>\n",
" <td>6351.00</td>\n",
" <td>11475.00</td>\n",
" <td>7203.00</td>\n",
" <td>4272.00</td>\n",
" <td>2998.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48606</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>21220.00</td>\n",
" <td>6971.00</td>\n",
" <td>11347.00</td>\n",
" <td>7136.00</td>\n",
" <td>4211.00</td>\n",
" <td>2902.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48607</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>21205.00</td>\n",
" <td>7144.00</td>\n",
" <td>11165.00</td>\n",
" <td>7012.00</td>\n",
" <td>4153.00</td>\n",
" <td>2896.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48608</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>20970.00</td>\n",
" <td>7213.00</td>\n",
" <td>10927.00</td>\n",
" <td>6812.00</td>\n",
" <td>4115.00</td>\n",
" <td>2830.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48609</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>20738.00</td>\n",
" <td>7198.00</td>\n",
" <td>10709.00</td>\n",
" <td>6530.00</td>\n",
" <td>4179.00</td>\n",
" <td>2831.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48610</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>20499.00</td>\n",
" <td>7256.00</td>\n",
" <td>10477.00</td>\n",
" <td>6317.00</td>\n",
" <td>4160.00</td>\n",
" <td>2766.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48611 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_kobiety Ludnosc_kobiety_w_wieku_poprodukcyjnym \n",
"0 0201011 2010 21224.00 5530.00 \\\n",
"1 0201011 2011 21134.00 5798.00 \n",
"2 0201011 2012 20992.00 6022.00 \n",
"3 0201011 2013 20866.00 6201.00 \n",
"4 0201011 2014 20824.00 6351.00 \n",
"... ... ... ... ... \n",
"48606 3263011 2018 21220.00 6971.00 \n",
"48607 3263011 2019 21205.00 7144.00 \n",
"48608 3263011 2020 20970.00 7213.00 \n",
"48609 3263011 2021 20738.00 7198.00 \n",
"48610 3263011 2022 20499.00 7256.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym \n",
"0 12550.00 \\\n",
"1 12249.00 \n",
"2 11922.00 \n",
"3 11692.00 \n",
"4 11475.00 \n",
"... ... \n",
"48606 11347.00 \n",
"48607 11165.00 \n",
"48608 10927.00 \n",
"48609 10709.00 \n",
"48610 10477.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym \n",
"0 7463.00 \\\n",
"1 7400.00 \n",
"2 7321.00 \n",
"3 7283.00 \n",
"4 7203.00 \n",
"... ... \n",
"48606 7136.00 \n",
"48607 7012.00 \n",
"48608 6812.00 \n",
"48609 6530.00 \n",
"48610 6317.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym \n",
"0 5087.00 \\\n",
"1 4849.00 \n",
"2 4601.00 \n",
"3 4409.00 \n",
"4 4272.00 \n",
"... ... \n",
"48606 4211.00 \n",
"48607 4153.00 \n",
"48608 4115.00 \n",
"48609 4179.00 \n",
"48610 4160.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_przedprodukcyjnym \n",
"0 3144.00 \n",
"1 3087.00 \n",
"2 3048.00 \n",
"3 2973.00 \n",
"4 2998.00 \n",
"... ... \n",
"48606 2902.00 \n",
"48607 2896.00 \n",
"48608 2830.00 \n",
"48609 2831.00 \n",
"48610 2766.00 \n",
"\n",
"[48611 rows x 8 columns]"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 423,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_3 = pd.read_csv( # kobiety\n",
" 'LUDN_1342_CREL_3.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_3 = df_ludn_3[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_3 = df_ludn_3.dropna()\n",
"df_ludn_3 = df_ludn_3.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_3 = df_ludn_3.rename(columns={\n",
" 'ogółem': 'Ludnosc_kobiety',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_kobiety_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_kobiety_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_3"
]
},
2024-05-10 19:17:54 +02:00
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 424,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [],
"source": [
"df_ludn_4 = pd.read_csv(\n",
" 'LUDN_2425_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_4 = df_ludn_4[['Kod', 'Wskaźniki', 'Rok', 'Wartosc']]\n",
"df_ludn_4 = df_ludn_4.dropna()\n",
"df_ludn_4 = df_ludn_4.pivot_table(index=['Kod', 'Rok'], columns='Wskaźniki', values='Wartosc').reset_index()\n",
"df_ludn_4 = df_ludn_4.rename(columns={\n",
" 'gęstość zaludnienia powierzchni zabudowanej i zurbanizowanej (osoby/km2)': 'Gestosc_zaludnienia',\n",
" 'ludność na 1 km2': 'Ludnosc_na_1_km2',\n",
" 'ludność w tysiącach': 'Ludnosc',\n",
" 'ludność w tysiącach kobiety': 'Ludnosc_kobiety',\n",
" 'ludność w tysiącach mężczyźni': 'Ludnosc_mezczyzni',\n",
" 'wskaźnik urbanizacji': 'Wskaznik_urbanizacji',\n",
" 'zmiana liczby ludności na 1000 mieszkańców': 'Zmiana_liczby_ludnosci'})\n",
"\n",
"df_ludn_4 = df_ludn_4[[\n",
" 'Kod',\n",
" 'Rok',\n",
" # 'Gestosc_zaludnienia',\n",
" 'Ludnosc_na_1_km2',\n",
" 'Ludnosc',\n",
" 'Ludnosc_kobiety',\n",
" 'Ludnosc_mezczyzni',\n",
" 'Wskaznik_urbanizacji',\n",
" 'Zmiana_liczby_ludnosci']]"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 425,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Kierunki migracji</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Saldo_migracji_na_1000_ludnosci</th>\n",
" <th>Saldo_migracji</th>\n",
" <th>Wymeldowania_do_miast_kobiety</th>\n",
" <th>Wymeldowania_do_miast_mezczyzni</th>\n",
" <th>Wymeldowania_do_miast_ogolem</th>\n",
" <th>Wymeldowania_na_wies_kobiety</th>\n",
" <th>Wymeldowania_na_wies_mezczyzni</th>\n",
" <th>Wymeldowania_na_wies_ogolem</th>\n",
" <th>...</th>\n",
" <th>Wymeldowania_za_granice_ogolem</th>\n",
" <th>Zameldowania_kobiety</th>\n",
" <th>Zameldowania_mezczyzni</th>\n",
" <th>Zameldowania_ogolem</th>\n",
" <th>Zameldowania_z_miast_kobiety</th>\n",
" <th>Zameldowania_z_miast_mezczyzni</th>\n",
" <th>Zameldowania_z_miast_ogolem</th>\n",
" <th>Zameldowania_ze_wsi_kobiety</th>\n",
" <th>Zameldowania_ze_wsi_mezczyzni</th>\n",
" <th>Zameldowania_ze_wsi_ogolem</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>-3.70</td>\n",
" <td>-151.00</td>\n",
" <td>108.00</td>\n",
" <td>96.00</td>\n",
" <td>204.00</td>\n",
" <td>170.00</td>\n",
" <td>177.00</td>\n",
" <td>347.00</td>\n",
" <td>...</td>\n",
" <td>0.00</td>\n",
" <td>223.00</td>\n",
" <td>177.00</td>\n",
" <td>400.00</td>\n",
" <td>70.00</td>\n",
" <td>52.00</td>\n",
" <td>122.00</td>\n",
" <td>147.00</td>\n",
" <td>118.00</td>\n",
" <td>265.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>-4.60</td>\n",
" <td>-186.00</td>\n",
" <td>111.00</td>\n",
" <td>99.00</td>\n",
" <td>210.00</td>\n",
" <td>170.00</td>\n",
" <td>157.00</td>\n",
" <td>327.00</td>\n",
" <td>...</td>\n",
" <td>1.00</td>\n",
" <td>196.00</td>\n",
" <td>156.00</td>\n",
" <td>352.00</td>\n",
" <td>67.00</td>\n",
" <td>59.00</td>\n",
" <td>126.00</td>\n",
" <td>125.00</td>\n",
" <td>94.00</td>\n",
" <td>219.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>-3.70</td>\n",
" <td>-149.00</td>\n",
" <td>100.00</td>\n",
" <td>92.00</td>\n",
" <td>192.00</td>\n",
" <td>147.00</td>\n",
" <td>153.00</td>\n",
" <td>300.00</td>\n",
" <td>...</td>\n",
" <td>9.00</td>\n",
" <td>197.00</td>\n",
" <td>155.00</td>\n",
" <td>352.00</td>\n",
" <td>78.00</td>\n",
" <td>61.00</td>\n",
" <td>139.00</td>\n",
" <td>116.00</td>\n",
" <td>92.00</td>\n",
" <td>208.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>-4.80</td>\n",
" <td>-191.00</td>\n",
" <td>115.00</td>\n",
" <td>88.00</td>\n",
" <td>203.00</td>\n",
" <td>182.00</td>\n",
" <td>158.00</td>\n",
" <td>340.00</td>\n",
" <td>...</td>\n",
" <td>24.00</td>\n",
" <td>211.00</td>\n",
" <td>165.00</td>\n",
" <td>376.00</td>\n",
" <td>83.00</td>\n",
" <td>58.00</td>\n",
" <td>141.00</td>\n",
" <td>128.00</td>\n",
" <td>101.00</td>\n",
" <td>229.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>-4.20</td>\n",
" <td>-167.00</td>\n",
" <td>100.00</td>\n",
" <td>86.00</td>\n",
" <td>186.00</td>\n",
" <td>168.00</td>\n",
" <td>161.00</td>\n",
" <td>329.00</td>\n",
" <td>...</td>\n",
" <td>41.00</td>\n",
" <td>196.00</td>\n",
" <td>193.00</td>\n",
" <td>389.00</td>\n",
" <td>71.00</td>\n",
" <td>71.00</td>\n",
" <td>142.00</td>\n",
" <td>125.00</td>\n",
" <td>121.00</td>\n",
" <td>246.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48606</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>1.70</td>\n",
" <td>71.00</td>\n",
" <td>125.00</td>\n",
" <td>152.00</td>\n",
" <td>277.00</td>\n",
" <td>40.00</td>\n",
" <td>66.00</td>\n",
" <td>106.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>245.00</td>\n",
" <td>240.00</td>\n",
" <td>485.00</td>\n",
" <td>156.00</td>\n",
" <td>138.00</td>\n",
" <td>294.00</td>\n",
" <td>73.00</td>\n",
" <td>79.00</td>\n",
" <td>152.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48607</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>3.40</td>\n",
" <td>141.00</td>\n",
" <td>151.00</td>\n",
" <td>116.00</td>\n",
" <td>267.00</td>\n",
" <td>48.00</td>\n",
" <td>53.00</td>\n",
" <td>101.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>273.00</td>\n",
" <td>259.00</td>\n",
" <td>532.00</td>\n",
" <td>179.00</td>\n",
" <td>149.00</td>\n",
" <td>328.00</td>\n",
" <td>71.00</td>\n",
" <td>90.00</td>\n",
" <td>161.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48608</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>3.20</td>\n",
" <td>129.00</td>\n",
" <td>98.00</td>\n",
" <td>99.00</td>\n",
" <td>197.00</td>\n",
" <td>40.00</td>\n",
" <td>44.00</td>\n",
" <td>84.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>226.00</td>\n",
" <td>203.00</td>\n",
" <td>429.00</td>\n",
" <td>159.00</td>\n",
" <td>131.00</td>\n",
" <td>290.00</td>\n",
" <td>52.00</td>\n",
" <td>53.00</td>\n",
" <td>105.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48609</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>-1.40</td>\n",
" <td>-55.00</td>\n",
" <td>122.00</td>\n",
" <td>126.00</td>\n",
" <td>248.00</td>\n",
" <td>63.00</td>\n",
" <td>50.00</td>\n",
" <td>113.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>171.00</td>\n",
" <td>168.00</td>\n",
" <td>339.00</td>\n",
" <td>109.00</td>\n",
" <td>95.00</td>\n",
" <td>204.00</td>\n",
" <td>49.00</td>\n",
" <td>46.00</td>\n",
" <td>95.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48610</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>-3.50</td>\n",
" <td>-138.00</td>\n",
" <td>116.00</td>\n",
" <td>105.00</td>\n",
" <td>221.00</td>\n",
" <td>73.00</td>\n",
" <td>69.00</td>\n",
" <td>142.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>141.00</td>\n",
" <td>138.00</td>\n",
" <td>279.00</td>\n",
" <td>85.00</td>\n",
" <td>71.00</td>\n",
" <td>156.00</td>\n",
" <td>38.00</td>\n",
" <td>39.00</td>\n",
" <td>77.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48611 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
"Kierunki migracji Kod Rok Saldo_migracji_na_1000_ludnosci \n",
"0 0201011 2010 -3.70 \\\n",
"1 0201011 2011 -4.60 \n",
"2 0201011 2012 -3.70 \n",
"3 0201011 2013 -4.80 \n",
"4 0201011 2014 -4.20 \n",
"... ... ... ... \n",
"48606 3263011 2018 1.70 \n",
"48607 3263011 2019 3.40 \n",
"48608 3263011 2020 3.20 \n",
"48609 3263011 2021 -1.40 \n",
"48610 3263011 2022 -3.50 \n",
"\n",
"Kierunki migracji Saldo_migracji Wymeldowania_do_miast_kobiety \n",
"0 -151.00 108.00 \\\n",
"1 -186.00 111.00 \n",
"2 -149.00 100.00 \n",
"3 -191.00 115.00 \n",
"4 -167.00 100.00 \n",
"... ... ... \n",
"48606 71.00 125.00 \n",
"48607 141.00 151.00 \n",
"48608 129.00 98.00 \n",
"48609 -55.00 122.00 \n",
"48610 -138.00 116.00 \n",
"\n",
"Kierunki migracji Wymeldowania_do_miast_mezczyzni \n",
"0 96.00 \\\n",
"1 99.00 \n",
"2 92.00 \n",
"3 88.00 \n",
"4 86.00 \n",
"... ... \n",
"48606 152.00 \n",
"48607 116.00 \n",
"48608 99.00 \n",
"48609 126.00 \n",
"48610 105.00 \n",
"\n",
"Kierunki migracji Wymeldowania_do_miast_ogolem Wymeldowania_na_wies_kobiety \n",
"0 204.00 170.00 \\\n",
"1 210.00 170.00 \n",
"2 192.00 147.00 \n",
"3 203.00 182.00 \n",
"4 186.00 168.00 \n",
"... ... ... \n",
"48606 277.00 40.00 \n",
"48607 267.00 48.00 \n",
"48608 197.00 40.00 \n",
"48609 248.00 63.00 \n",
"48610 221.00 73.00 \n",
"\n",
"Kierunki migracji Wymeldowania_na_wies_mezczyzni \n",
"0 177.00 \\\n",
"1 157.00 \n",
"2 153.00 \n",
"3 158.00 \n",
"4 161.00 \n",
"... ... \n",
"48606 66.00 \n",
"48607 53.00 \n",
"48608 44.00 \n",
"48609 50.00 \n",
"48610 69.00 \n",
"\n",
"Kierunki migracji Wymeldowania_na_wies_ogolem ... \n",
"0 347.00 ... \\\n",
"1 327.00 ... \n",
"2 300.00 ... \n",
"3 340.00 ... \n",
"4 329.00 ... \n",
"... ... ... \n",
"48606 106.00 ... \n",
"48607 101.00 ... \n",
"48608 84.00 ... \n",
"48609 113.00 ... \n",
"48610 142.00 ... \n",
"\n",
"Kierunki migracji Wymeldowania_za_granice_ogolem Zameldowania_kobiety \n",
"0 0.00 223.00 \\\n",
"1 1.00 196.00 \n",
"2 9.00 197.00 \n",
"3 24.00 211.00 \n",
"4 41.00 196.00 \n",
"... ... ... \n",
"48606 NaN 245.00 \n",
"48607 NaN 273.00 \n",
"48608 NaN 226.00 \n",
"48609 NaN 171.00 \n",
"48610 NaN 141.00 \n",
"\n",
"Kierunki migracji Zameldowania_mezczyzni Zameldowania_ogolem \n",
"0 177.00 400.00 \\\n",
"1 156.00 352.00 \n",
"2 155.00 352.00 \n",
"3 165.00 376.00 \n",
"4 193.00 389.00 \n",
"... ... ... \n",
"48606 240.00 485.00 \n",
"48607 259.00 532.00 \n",
"48608 203.00 429.00 \n",
"48609 168.00 339.00 \n",
"48610 138.00 279.00 \n",
"\n",
"Kierunki migracji Zameldowania_z_miast_kobiety \n",
"0 70.00 \\\n",
"1 67.00 \n",
"2 78.00 \n",
"3 83.00 \n",
"4 71.00 \n",
"... ... \n",
"48606 156.00 \n",
"48607 179.00 \n",
"48608 159.00 \n",
"48609 109.00 \n",
"48610 85.00 \n",
"\n",
"Kierunki migracji Zameldowania_z_miast_mezczyzni \n",
"0 52.00 \\\n",
"1 59.00 \n",
"2 61.00 \n",
"3 58.00 \n",
"4 71.00 \n",
"... ... \n",
"48606 138.00 \n",
"48607 149.00 \n",
"48608 131.00 \n",
"48609 95.00 \n",
"48610 71.00 \n",
"\n",
"Kierunki migracji Zameldowania_z_miast_ogolem Zameldowania_ze_wsi_kobiety \n",
"0 122.00 147.00 \\\n",
"1 126.00 125.00 \n",
"2 139.00 116.00 \n",
"3 141.00 128.00 \n",
"4 142.00 125.00 \n",
"... ... ... \n",
"48606 294.00 73.00 \n",
"48607 328.00 71.00 \n",
"48608 290.00 52.00 \n",
"48609 204.00 49.00 \n",
"48610 156.00 38.00 \n",
"\n",
"Kierunki migracji Zameldowania_ze_wsi_mezczyzni Zameldowania_ze_wsi_ogolem \n",
"0 118.00 265.00 \n",
"1 94.00 219.00 \n",
"2 92.00 208.00 \n",
"3 101.00 229.00 \n",
"4 121.00 246.00 \n",
"... ... ... \n",
"48606 79.00 152.00 \n",
"48607 90.00 161.00 \n",
"48608 53.00 105.00 \n",
"48609 46.00 95.00 \n",
"48610 39.00 77.00 \n",
"\n",
"[48611 rows x 25 columns]"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 425,
2024-05-10 19:17:54 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_5 = pd.read_csv(\n",
" 'LUDN_1355_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_5['Kierunki migracji'] = df_ludn_5['Kierunki migracji'] + df_ludn_5['Płeć']\n",
"df_ludn_5 = df_ludn_5[['Kod', 'Kierunki migracji', 'Rok', 'Wartosc']]\n",
"df_ludn_5 = df_ludn_5.dropna()\n",
"df_ludn_5 = df_ludn_5.pivot_table(index=['Kod', 'Rok'], columns='Kierunki migracji', values='Wartosc').reset_index()\n",
"df_ludn_5 = df_ludn_5.rename(columns={\n",
" 'saldo migracji na 1000 ludnościogółem': 'Saldo_migracji_na_1000_ludnosci',\n",
" 'saldo migracjiogółem': 'Saldo_migracji',\n",
" 'wymeldowania do miastkobiety': 'Wymeldowania_do_miast_kobiety',\n",
" 'wymeldowania do miastmężczyźni': 'Wymeldowania_do_miast_mezczyzni',\n",
" 'wymeldowania do miastogółem': 'Wymeldowania_do_miast_ogolem',\n",
" 'wymeldowania na wieśkobiety': 'Wymeldowania_na_wies_kobiety',\n",
" 'wymeldowania na wieśmężczyźni': 'Wymeldowania_na_wies_mezczyzni',\n",
" 'wymeldowania na wieśogółem': 'Wymeldowania_na_wies_ogolem',\n",
" 'wymeldowania ogółemkobiety': 'Wymeldowania_kobiety',\n",
" 'wymeldowania ogółemmężczyźni': 'Wymeldowania_mezczyzni',\n",
" 'wymeldowania ogółemogółem': 'Wymeldowania_ogolem',\n",
" 'wymeldowania za granicękobiety': 'Wymeldowania_za_granice_kobiety',\n",
" 'wymeldowania za granicęmężczyźni': 'Wymeldowania_za_granice_mezczyzni',\n",
" 'wymeldowania za granicęogółem': 'Wymeldowania_za_granice_ogolem',\n",
" 'zameldowania ogółemkobiety': 'Zameldowania_kobiety',\n",
" 'zameldowania ogółemmężczyźni': 'Zameldowania_mezczyzni',\n",
" 'zameldowania ogółemogółem': 'Zameldowania_ogolem',\n",
" 'zameldowania z miastkobiety': 'Zameldowania_z_miast_kobiety',\n",
" 'zameldowania z miastmężczyźni': 'Zameldowania_z_miast_mezczyzni',\n",
" 'zameldowania z miastogółem': 'Zameldowania_z_miast_ogolem',\n",
" 'zameldowania ze wsikobiety': 'Zameldowania_ze_wsi_kobiety',\n",
" 'zameldowania ze wsimężczyźni': 'Zameldowania_ze_wsi_mezczyzni',\n",
" 'zameldowania ze wsiogółem': 'Zameldowania_ze_wsi_ogolem'})\n",
"\n",
"df_ludn_5"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 426,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Turystyczne obiekty noclegowe</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Miejsca_noclegowe_caloroczne</th>\n",
" <th>Miejsca_noclegowe_ogolem</th>\n",
" <th>Obiekty_caloroczne</th>\n",
" <th>Obiekty_ogolem</th>\n",
" <th>Turysci_ogolem</th>\n",
" <th>Turysci_zagraniczni</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>265.00</td>\n",
" <td>265.00</td>\n",
" <td>7.00</td>\n",
" <td>7.00</td>\n",
" <td>16427.00</td>\n",
" <td>5173.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>267.00</td>\n",
" <td>267.00</td>\n",
" <td>7.00</td>\n",
" <td>7.00</td>\n",
" <td>13134.00</td>\n",
" <td>4486.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>295.00</td>\n",
" <td>295.00</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>13159.00</td>\n",
" <td>4856.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>293.00</td>\n",
" <td>293.00</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>11914.00</td>\n",
" <td>4701.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>292.00</td>\n",
" <td>292.00</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>12398.00</td>\n",
" <td>3919.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34697</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>9757.00</td>\n",
" <td>11717.00</td>\n",
" <td>76.00</td>\n",
" <td>107.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34698</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>9963.00</td>\n",
" <td>11805.00</td>\n",
" <td>74.00</td>\n",
" <td>103.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34699</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>9673.00</td>\n",
" <td>11557.00</td>\n",
" <td>68.00</td>\n",
" <td>97.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34700</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>8731.00</td>\n",
" <td>10551.00</td>\n",
" <td>66.00</td>\n",
" <td>92.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34701</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>8893.00</td>\n",
" <td>10738.00</td>\n",
" <td>68.00</td>\n",
" <td>92.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>34702 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Turystyczne obiekty noclegowe Kod Rok Miejsca_noclegowe_caloroczne \n",
"0 0201011 2010 265.00 \\\n",
"1 0201011 2011 267.00 \n",
"2 0201011 2012 295.00 \n",
"3 0201011 2013 293.00 \n",
"4 0201011 2014 292.00 \n",
"... ... ... ... \n",
"34697 3263011 2018 9757.00 \n",
"34698 3263011 2019 9963.00 \n",
"34699 3263011 2020 9673.00 \n",
"34700 3263011 2021 8731.00 \n",
"34701 3263011 2022 8893.00 \n",
"\n",
"Turystyczne obiekty noclegowe Miejsca_noclegowe_ogolem Obiekty_caloroczne \n",
"0 265.00 7.00 \\\n",
"1 267.00 7.00 \n",
"2 295.00 8.00 \n",
"3 293.00 8.00 \n",
"4 292.00 8.00 \n",
"... ... ... \n",
"34697 11717.00 76.00 \n",
"34698 11805.00 74.00 \n",
"34699 11557.00 68.00 \n",
"34700 10551.00 66.00 \n",
"34701 10738.00 68.00 \n",
"\n",
"Turystyczne obiekty noclegowe Obiekty_ogolem Turysci_ogolem \n",
"0 7.00 16427.00 \\\n",
"1 7.00 13134.00 \n",
"2 8.00 13159.00 \n",
"3 8.00 11914.00 \n",
"4 8.00 12398.00 \n",
"... ... ... \n",
"34697 107.00 NaN \n",
"34698 103.00 NaN \n",
"34699 97.00 NaN \n",
"34700 92.00 NaN \n",
"34701 92.00 NaN \n",
"\n",
"Turystyczne obiekty noclegowe Turysci_zagraniczni \n",
"0 5173.00 \n",
"1 4486.00 \n",
"2 4856.00 \n",
"3 4701.00 \n",
"4 3919.00 \n",
"... ... \n",
"34697 NaN \n",
"34698 NaN \n",
"34699 NaN \n",
"34700 NaN \n",
"34701 NaN \n",
"\n",
"[34702 rows x 8 columns]"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 426,
2024-05-10 19:17:54 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_tury = pd.read_csv(\n",
" 'TURY_2017_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_tury = df_tury[['Kod', 'Turystyczne obiekty noclegowe', 'Rok', 'Wartosc']]\n",
"df_tury = df_tury.dropna()\n",
"df_tury = df_tury.pivot_table(index=['Kod', 'Rok'], columns='Turystyczne obiekty noclegowe', values='Wartosc').reset_index()\n",
"df_tury = df_tury.rename(columns={\n",
" 'miejsca noclegowe całoroczne lipiec': 'Miejsca_noclegowe_caloroczne',\n",
" 'miejsca noclegowe ogółem lipiec': 'Miejsca_noclegowe_ogolem',\n",
" 'obiekty całoroczne lipiec': 'Obiekty_caloroczne',\n",
" 'obiekty ogółem lipiec': 'Obiekty_ogolem',\n",
" 'turyści (korzystający) ogółem styczeń-grudzień': 'Turysci_ogolem',\n",
" 'turyści zagraniczni (korzystający) - nierezydenci styczeń-grudzień': 'Turysci_zagraniczni'})\n",
"\n",
"df_tury"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 427,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Bezrobotni</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Bezrobotni_do_25_roku_zycia</th>\n",
" <th>Bezrobotni_do_30_roku_zycia</th>\n",
" <th>Dlugotrwale_bezrobotni</th>\n",
" <th>Bezrobotne_kobiety</th>\n",
" <th>Bezrobotni_mezczyzni</th>\n",
" <th>Bezrobotni_ogolem</th>\n",
" <th>Bezrobotni_powyzej_50_roku_zycia</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>284.00</td>\n",
" <td>NaN</td>\n",
" <td>819.50</td>\n",
" <td>900.50</td>\n",
" <td>818.00</td>\n",
" <td>1718.50</td>\n",
" <td>486.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>293.00</td>\n",
" <td>NaN</td>\n",
" <td>756.50</td>\n",
" <td>894.50</td>\n",
" <td>888.00</td>\n",
" <td>1782.50</td>\n",
" <td>498.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>253.50</td>\n",
" <td>NaN</td>\n",
" <td>788.00</td>\n",
" <td>869.50</td>\n",
" <td>874.00</td>\n",
" <td>1743.50</td>\n",
" <td>521.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>172.50</td>\n",
" <td>NaN</td>\n",
" <td>651.50</td>\n",
" <td>648.50</td>\n",
" <td>667.50</td>\n",
" <td>1316.00</td>\n",
" <td>402.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2015</td>\n",
" <td>107.50</td>\n",
" <td>238.00</td>\n",
" <td>434.50</td>\n",
" <td>504.00</td>\n",
" <td>518.50</td>\n",
" <td>1022.50</td>\n",
" <td>359.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48530</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>27.50</td>\n",
" <td>66.00</td>\n",
" <td>226.50</td>\n",
" <td>272.50</td>\n",
" <td>221.00</td>\n",
" <td>493.50</td>\n",
" <td>181.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48531</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>56.00</td>\n",
" <td>142.00</td>\n",
" <td>239.50</td>\n",
" <td>390.00</td>\n",
" <td>361.50</td>\n",
" <td>751.50</td>\n",
" <td>250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48532</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>34.50</td>\n",
" <td>88.00</td>\n",
" <td>260.50</td>\n",
" <td>295.00</td>\n",
" <td>341.00</td>\n",
" <td>636.00</td>\n",
" <td>239.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48533</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>31.50</td>\n",
" <td>72.00</td>\n",
" <td>199.00</td>\n",
" <td>211.50</td>\n",
" <td>270.50</td>\n",
" <td>482.00</td>\n",
" <td>182.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48534</th>\n",
" <td>3263011</td>\n",
" <td>2023</td>\n",
" <td>33.50</td>\n",
" <td>81.00</td>\n",
" <td>200.00</td>\n",
" <td>241.00</td>\n",
" <td>287.50</td>\n",
" <td>528.50</td>\n",
" <td>189.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48535 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
"Bezrobotni Kod Rok Bezrobotni_do_25_roku_zycia \n",
"0 0201011 2011 284.00 \\\n",
"1 0201011 2012 293.00 \n",
"2 0201011 2013 253.50 \n",
"3 0201011 2014 172.50 \n",
"4 0201011 2015 107.50 \n",
"... ... ... ... \n",
"48530 3263011 2019 27.50 \n",
"48531 3263011 2020 56.00 \n",
"48532 3263011 2021 34.50 \n",
"48533 3263011 2022 31.50 \n",
"48534 3263011 2023 33.50 \n",
"\n",
"Bezrobotni Bezrobotni_do_30_roku_zycia Dlugotrwale_bezrobotni \n",
"0 NaN 819.50 \\\n",
"1 NaN 756.50 \n",
"2 NaN 788.00 \n",
"3 NaN 651.50 \n",
"4 238.00 434.50 \n",
"... ... ... \n",
"48530 66.00 226.50 \n",
"48531 142.00 239.50 \n",
"48532 88.00 260.50 \n",
"48533 72.00 199.00 \n",
"48534 81.00 200.00 \n",
"\n",
"Bezrobotni Bezrobotne_kobiety Bezrobotni_mezczyzni Bezrobotni_ogolem \n",
"0 900.50 818.00 1718.50 \\\n",
"1 894.50 888.00 1782.50 \n",
"2 869.50 874.00 1743.50 \n",
"3 648.50 667.50 1316.00 \n",
"4 504.00 518.50 1022.50 \n",
"... ... ... ... \n",
"48530 272.50 221.00 493.50 \n",
"48531 390.00 361.50 751.50 \n",
"48532 295.00 341.00 636.00 \n",
"48533 211.50 270.50 482.00 \n",
"48534 241.00 287.50 528.50 \n",
"\n",
"Bezrobotni Bezrobotni_powyzej_50_roku_zycia \n",
"0 486.50 \n",
"1 498.50 \n",
"2 521.00 \n",
"3 402.00 \n",
"4 359.00 \n",
"... ... \n",
"48530 181.00 \n",
"48531 250.00 \n",
"48532 239.50 \n",
"48533 182.50 \n",
"48534 189.00 \n",
"\n",
"[48535 rows x 9 columns]"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 427,
2024-05-10 19:17:54 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ryne = pd.read_csv(\n",
" 'RYNE_3733_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ryne = df_ryne[['Kod', 'Bezrobotni', 'Rok', 'Wartosc']]\n",
"df_ryne = df_ryne.dropna()\n",
"df_ryne = df_ryne.pivot_table(index=['Kod', 'Rok'], columns='Bezrobotni', values='Wartosc').reset_index()\n",
"df_ryne = df_ryne.rename(columns={\n",
" 'do 25 roku życia': 'Bezrobotni_do_25_roku_zycia',\n",
" 'do 30 roku życia': 'Bezrobotni_do_30_roku_zycia',\n",
" 'długotrwale bezrobotni': 'Dlugotrwale_bezrobotni',\n",
" 'kobiety': 'Bezrobotne_kobiety',\n",
" 'mężczyźni': 'Bezrobotni_mezczyzni',\n",
" 'ogółem': 'Bezrobotni_ogolem',\n",
" 'powyżej 50 roku życia': 'Bezrobotni_powyzej_50_roku_zycia'})\n",
"\n",
"df_ryne"
]
},
2024-05-06 22:55:21 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 428,
2024-05-06 22:55:21 +02:00
"metadata": {},
2024-05-10 19:17:54 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"2273\n"
2024-05-10 19:17:54 +02:00
]
}
],
2024-05-06 22:55:21 +02:00
"source": [
2024-05-10 19:17:54 +02:00
"df_data = df_dofinansowanie_agg.copy()\n",
"print(len(df_data))"
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 429,
2024-05-09 08:21:38 +02:00
"metadata": {},
"outputs": [],
"source": [
"wojewodztwo_dictionary = {\n",
"'02': 'Dolnoslaskie',\n",
"'04': 'Kujawsko_Pomorskie',\n",
"'06': 'Lubelskie',\n",
"'08': 'Lubuskie',\n",
"'10': 'Lodzkie',\n",
"'12': 'Malopolskie',\n",
"'14': 'Mazowieckie',\n",
"'16': 'Opolskie',\n",
"'18': 'Podkarpackie',\n",
"'20': 'Podlaskie',\n",
"'22': 'Pomorskie',\n",
"'24': 'Slaskie',\n",
"'26': 'Swietokrzyskie',\n",
"'28': 'Warminsko_Mazurskie',\n",
"'30': 'Wielkopolskie',\n",
"'32': 'Zachodniopomorskie'}\n",
"\n",
"df_data = pd.concat([df_data, pd.get_dummies(df_data['Kod'].apply(lambda x: wojewodztwo_dictionary.get(x[:2], None)), prefix='Wojewodztwo').astype(int)], axis=1)"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 430,
"metadata": {},
"outputs": [],
"source": [
"rodzaj_gminy_dictionary = {\n",
"'1': 'Gmina miejska',\n",
"'2': 'Gmina wiejska',\n",
"'3': 'Gmina miejsko-wiejska'}\n",
"\n",
"df_data = pd.concat([df_data, pd.get_dummies(df_data['Kod'].apply(lambda x: rodzaj_gminy_dictionary.get(x[-1], None))).astype(int)], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 431,
2024-05-06 22:55:21 +02:00
"metadata": {},
2024-05-10 19:17:54 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"2273\n"
2024-05-10 19:17:54 +02:00
]
}
],
"source": [
"df_data = df_data.merge(df_podz, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_podz['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_podz'))\n",
"df_data = df_data.drop(['key_0', 'Kod_podz'], axis=1)\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 432,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"2273\n"
2024-05-10 19:17:54 +02:00
]
}
],
"source": [
"df_data = df_data.merge(df_wyna, left_on=[df_data['Kod'].str.slice(stop=-3), 'Rok'], right_on=[df_wyna['Kod'].str.slice(stop=-3), 'Rok'], how='left', suffixes=(None, '_wyna'))\n",
"df_data = df_data.drop(['key_0', 'Kod_wyna'], axis=1)\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 433,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"2273\n",
"2273\n"
2024-05-10 19:17:54 +02:00
]
}
],
"source": [
"df_data = df_data.merge(df_fina_1, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_fina_1['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_fina_1'))\n",
"df_data = df_data.drop(['key_0', 'Kod_fina_1'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"df_data = df_data.merge(df_fina_2, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_fina_2['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_fina_2'))\n",
"df_data = df_data.drop(['key_0', 'Kod_fina_2'], axis=1)\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 434,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"2273\n",
"2273\n",
"2273\n",
"2273\n",
"2273\n"
2024-05-10 19:17:54 +02:00
]
}
],
2024-05-06 22:55:21 +02:00
"source": [
2024-05-10 19:17:54 +02:00
"df_data = df_data.merge(df_ludn_1, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_1'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_ludn_1'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"df_data = df_data.merge(df_ludn_2, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_2'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_ludn_2'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"df_data = df_data.merge(df_ludn_3, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_3'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_ludn_3'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"df_data = df_data.merge(df_ludn_4, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_4'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_ludn_4'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"df_data = df_data.merge(df_ludn_5, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_5'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_ludn_5'], axis=1)\n",
"print(len(df_data))"
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 435,
2024-05-06 22:55:21 +02:00
"metadata": {},
2024-05-10 19:17:54 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"2273\n"
2024-05-10 19:17:54 +02:00
]
}
],
2024-05-06 22:55:21 +02:00
"source": [
2024-05-10 19:17:54 +02:00
"df_data = df_data.merge(df_tury, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_tury'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_tury'], axis=1)\n",
"print(len(df_data))"
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 436,
2024-05-06 22:55:21 +02:00
"metadata": {},
2024-05-10 19:17:54 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"2273\n"
2024-05-10 19:17:54 +02:00
]
}
],
2024-05-06 22:55:21 +02:00
"source": [
2024-05-10 19:17:54 +02:00
"df_data = df_data.merge(df_ryne, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ryne'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_ryne'], axis=1)\n",
"print(len(df_data))"
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 437,
2024-05-06 22:55:21 +02:00
"metadata": {},
2024-05-10 19:17:54 +02:00
"outputs": [
{
"data": {
"text/plain": [
2024-05-13 15:20:33 +02:00
"0.6733698870134954"
2024-05-10 19:17:54 +02:00
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 437,
2024-05-10 19:17:54 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-05-06 22:55:21 +02:00
"source": [
2024-05-10 19:17:54 +02:00
"df_data['Gestosc_zaludnienia'] = df_data['Ludnosc'] / df_data['Powierzchnia']\n",
2024-05-06 23:50:22 +02:00
"\n",
2024-05-10 19:17:54 +02:00
"df_data['Gestosc_zaludnienia'].mean()"
2024-05-06 22:55:21 +02:00
]
},
2024-05-13 15:20:33 +02:00
{
"cell_type": "code",
"execution_count": 438,
"metadata": {},
"outputs": [],
"source": [
"df_data['Suma'] = df_data['Suma'] / df_data['Ludnosc']"
]
},
2024-05-06 22:55:21 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 439,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-05-10 19:17:54 +02:00
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Suma</th>\n",
2024-05-13 15:20:33 +02:00
" <th>Ludnosc</th>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-05-13 15:20:33 +02:00
" <td>62917.04</td>\n",
" <td>39.46</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-05-13 15:20:33 +02:00
" <td>3520.19</td>\n",
" <td>39.08</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-05-13 15:20:33 +02:00
" <td>224163.57</td>\n",
" <td>37.66</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-05-13 15:20:33 +02:00
" <td>32473.38</td>\n",
" <td>14.09</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-05-13 15:20:33 +02:00
" <td>25713.81</td>\n",
" <td>5.35</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
2024-05-13 15:20:33 +02:00
" <th>2268</th>\n",
2024-05-10 19:17:54 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
2024-05-13 15:20:33 +02:00
" <th>2269</th>\n",
" <td>196539.43</td>\n",
" <td>41.28</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" <tr>\n",
2024-05-13 15:20:33 +02:00
" <th>2270</th>\n",
" <td>7900.24</td>\n",
" <td>41.15</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" <tr>\n",
2024-05-13 15:20:33 +02:00
" <th>2271</th>\n",
" <td>11386418.96</td>\n",
" <td>41.03</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" <tr>\n",
2024-05-13 15:20:33 +02:00
" <th>2272</th>\n",
" <td>188601.28</td>\n",
" <td>40.33</td>\n",
2024-05-10 19:17:54 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-05-13 15:20:33 +02:00
"<p>2273 rows × 2 columns</p>\n",
2024-05-10 19:17:54 +02:00
"</div>"
],
2024-05-06 22:55:21 +02:00
"text/plain": [
2024-05-13 15:20:33 +02:00
" Suma Ludnosc\n",
"0 62917.04 39.46\n",
"1 3520.19 39.08\n",
"2 224163.57 37.66\n",
"3 32473.38 14.09\n",
"4 25713.81 5.35\n",
"... ... ...\n",
"2268 NaN NaN\n",
"2269 196539.43 41.28\n",
"2270 7900.24 41.15\n",
"2271 11386418.96 41.03\n",
"2272 188601.28 40.33\n",
"\n",
"[2273 rows x 2 columns]"
2024-05-06 22:55:21 +02:00
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 439,
2024-05-06 22:55:21 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-05-13 15:20:33 +02:00
"df_data[['Suma', 'Ludnosc']]"
2024-05-06 22:55:21 +02:00
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 440,
2024-05-06 22:55:21 +02:00
"metadata": {},
"outputs": [],
"source": [
"# df_data[df_data.isna().any(axis=1)] # ['Rok'].drop_duplicates().reset_index(drop=True)"
]
},
2024-05-10 19:17:54 +02:00
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 441,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [],
"source": [
"s = df_data.isna().sum()"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 442,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-05-13 15:20:33 +02:00
"Wymeldowania_za_granice_kobiety 775\n",
"Wymeldowania_za_granice_mezczyzni 775\n",
"Wymeldowania_za_granice_ogolem 775\n",
"Turysci_ogolem 1724\n",
"Turysci_zagraniczni 1724\n",
"Bezrobotni_do_30_roku_zycia 657\n",
2024-05-10 19:17:54 +02:00
"dtype: int64"
]
},
2024-05-13 15:20:33 +02:00
"execution_count": 442,
2024-05-10 19:17:54 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[s > 330]"
]
},
2024-05-06 22:55:21 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-05-06 23:50:22 +02:00
"..."
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 443,
2024-05-06 23:50:22 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"2273\n",
"2191\n",
"Mean Squared Error: 20582414771111.633\n"
2024-05-06 23:50:22 +02:00
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
2024-05-10 19:17:54 +02:00
"from sklearn.tree import DecisionTreeRegressor, plot_tree, export_text\n",
2024-05-06 23:50:22 +02:00
"from sklearn.metrics import mean_squared_error\n",
"import matplotlib.pyplot as plt\n",
"\n",
2024-05-10 19:17:54 +02:00
"df_data[[\n",
" 'Miejsca_noclegowe_caloroczne',\n",
" 'Miejsca_noclegowe_ogolem',\n",
" 'Obiekty_caloroczne',\n",
" 'Obiekty_ogolem',\n",
" 'Turysci_ogolem',\n",
" 'Turysci_zagraniczni']] = df_data[[\n",
" 'Miejsca_noclegowe_caloroczne',\n",
" 'Miejsca_noclegowe_ogolem',\n",
" 'Obiekty_caloroczne',\n",
" 'Obiekty_ogolem',\n",
" 'Turysci_ogolem',\n",
" 'Turysci_zagraniczni']].fillna(0)\n",
2024-05-06 23:50:22 +02:00
"\n",
"feature_names = [\n",
2024-05-09 08:21:38 +02:00
" 'Powierzchnia', # 1\n",
" 'Wynagrodzenie_ogolem', # 2\n",
" 'Wynagrodzenie_w_relacji_do_sredniej', # 3\n",
" 'Dochody_podatek_lesny', # 4\n",
" 'Dochody_podatek_PCC', # 5\n",
" 'Dochody_podatek_od_dzialalnosci_gospodarczej', # 6\n",
" 'Dochody_podatek_od_nieruchomosci', # 7\n",
" 'Dochody_podatek_od_spadkow', # 8\n",
" 'Dochody_podatek_od_srodkow_transportowych', # 9\n",
" 'Dochody_podatek_rolny', # 10\n",
" 'Dochody_podatek_odrebne_ustawy', # 11\n",
" 'Dochody_razem', # 12\n",
" 'Dochody_z_majatku', # 13\n",
" 'Dochody_z_najmu_i_dzierzawy', # 14\n",
" 'Dochody_z_uslug', # 15\n",
" 'Dochody_dofinansowanie_inwestycyjne', # 16\n",
" 'Dochody_dofinansowanie_razem', # 17\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_fizycznych', # 18\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_prywatnych', # 19\n",
" 'Udzialy_w_podatkach_dochodowych_razem', # 20\n",
" 'Wplywy_z_innych_lokalnych_oplat', # 21\n",
" 'Wplywy_z_oplaty_eksploatacyjnej', # 22\n",
" 'Wplywy_z_oplaty_skarbowej', # 23\n",
" 'Wplywy_z_oplaty_targowej', # 24\n",
" 'Ludnosc_ogolem', # 25\n",
" 'Ludnosc_w_wieku_poprodukcyjnym', # 26\n",
" 'Ludnosc_w_wieku_produkcyjnym', # 27\n",
" 'Ludnosc_w_wieku_produkcyjnym_mobilnym', # 28\n",
" 'Ludnosc_w_wieku_produkcyjnym_niemobilnym', # 29\n",
" 'Ludnosc_w_wieku_przedprodukcyjnym', # 30\n",
" 'Ludnosc_mezczyzni', # 31\n",
" 'Ludnosc_mezczyzni_w_wieku_poprodukcyjnym', # 32\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym', # 33\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym', # 34\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym', # 35\n",
" 'Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym', # 36\n",
" 'Ludnosc_kobiety', # 37\n",
" 'Ludnosc_kobiety_w_wieku_poprodukcyjnym', # 38\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym', # 39\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym', # 40\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym', # 41\n",
" 'Ludnosc_kobiety_w_wieku_przedprodukcyjnym', # 42\n",
" 'Wojewodztwo_Dolnoslaskie', # 43\n",
" 'Wojewodztwo_Kujawsko_Pomorskie', # 44\n",
" 'Wojewodztwo_Lubelskie', # 45\n",
" 'Wojewodztwo_Lubuskie', # 46\n",
" 'Wojewodztwo_Lodzkie', # 47\n",
" 'Wojewodztwo_Malopolskie', # 48\n",
" 'Wojewodztwo_Mazowieckie', # 49\n",
" 'Wojewodztwo_Opolskie', # 50\n",
" 'Wojewodztwo_Podkarpackie', # 51\n",
" 'Wojewodztwo_Podlaskie', # 52\n",
" 'Wojewodztwo_Pomorskie', # 53\n",
" 'Wojewodztwo_Slaskie', # 54\n",
" 'Wojewodztwo_Swietokrzyskie', # 55\n",
" 'Wojewodztwo_Warminsko_Mazurskie', # 56\n",
" 'Wojewodztwo_Wielkopolskie', # 57\n",
2024-05-10 19:17:54 +02:00
" 'Wojewodztwo_Zachodniopomorskie', # 58\n",
" 'Gestosc_zaludnienia', # 59\n",
" 'Ludnosc_na_1_km2', # 60\n",
" 'Ludnosc', # 61\n",
" 'Ludnosc_kobiety', # 62\n",
" 'Ludnosc_mezczyzni', # 63\n",
" 'Wskaznik_urbanizacji', # 64\n",
" 'Zmiana_liczby_ludnosci', # 65\n",
" 'Saldo_migracji_na_1000_ludnosci', # 66\n",
" 'Saldo_migracji', # 67\n",
" 'Wymeldowania_do_miast_kobiety', # 68\n",
" 'Wymeldowania_do_miast_mezczyzni', # 69\n",
" 'Wymeldowania_do_miast_ogolem', # 70\n",
" 'Wymeldowania_na_wies_kobiety', # 71\n",
" 'Wymeldowania_na_wies_mezczyzni', # 72\n",
" 'Wymeldowania_na_wies_ogolem', # 73\n",
" 'Wymeldowania_kobiety', # 74\n",
" 'Wymeldowania_mezczyzni', # 75\n",
" 'Wymeldowania_ogolem', # 76\n",
" 'Zameldowania_kobiety', # 77\n",
" 'Zameldowania_mezczyzni', # 78\n",
" 'Zameldowania_ogolem', # 79\n",
" 'Zameldowania_z_miast_kobiety', # 80\n",
" 'Zameldowania_z_miast_mezczyzni', # 81\n",
" 'Zameldowania_z_miast_ogolem', # 82\n",
" 'Zameldowania_ze_wsi_kobiety', # 83\n",
" 'Zameldowania_ze_wsi_mezczyzni', # 84\n",
" 'Zameldowania_ze_wsi_ogolem', # 85\n",
" 'Miejsca_noclegowe_caloroczne', # 86\n",
" 'Miejsca_noclegowe_ogolem', # 87\n",
" 'Obiekty_caloroczne', # 88\n",
" 'Obiekty_ogolem', # 89\n",
" 'Turysci_ogolem', # 90\n",
" 'Turysci_zagraniczni', # 91\n",
" 'Bezrobotni_do_25_roku_zycia', # 92\n",
" 'Dlugotrwale_bezrobotni', # 93\n",
" 'Bezrobotne_kobiety', # 94\n",
" 'Bezrobotni_mezczyzni', # 95\n",
" 'Bezrobotni_ogolem', # 96\n",
" 'Bezrobotni_powyzej_50_roku_zycia'] # 97\n",
"\n",
"df_data.drop(columns=[\n",
" 'Wymeldowania_za_granice_kobiety',\n",
" 'Wymeldowania_za_granice_mezczyzni',\n",
" 'Wymeldowania_za_granice_ogolem',\n",
" 'Bezrobotni_do_30_roku_zycia'], inplace=True, errors='ignore')\n",
"\n",
"print(len(df_data))\n",
"df_data.dropna(inplace=True)\n",
"df_data = df_data[df_data['Suma'] > 0]\n",
"print(len(df_data))\n",
2024-05-06 23:50:22 +02:00
"\n",
"X = df_data[feature_names]\n",
"y = df_data['Suma']\n",
"\n",
2024-05-10 19:17:54 +02:00
"color_column = df_data['Gestosc_zaludnienia']\n",
"color_column = (df_data['Gestosc_zaludnienia'] > 1.5).astype(int)\n",
"\n",
"X_train, X_test, y_train, y_test, color_column_train, color_column_test = train_test_split(X, y, color_column, test_size=0.2, random_state=1)\n",
2024-05-06 23:50:22 +02:00
"\n",
"model = DecisionTreeRegressor(random_state=1)\n",
"model.fit(X_train, y_train)\n",
"\n",
"y_pred = model.predict(X_test)\n",
"mse = mean_squared_error(y_test, y_pred)\n",
2024-05-10 19:17:54 +02:00
"print('Mean Squared Error:', mse)"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 444,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"Gmina wiejska 0.14\n",
"Wojewodztwo_Dolnoslaskie 0.09\n",
"Dochody_podatek_rolny 0.07\n",
"Saldo_migracji_na_1000_ludnosci 0.06\n",
"Zmiana_liczby_ludnosci 0.05\n",
"Wojewodztwo_Warminsko_Mazurskie 0.05\n",
"Wojewodztwo_Pomorskie 0.04\n",
"Wojewodztwo_Opolskie 0.02\n",
"Saldo_migracji 0.01\n",
"Wojewodztwo_Mazowieckie 0.01\n",
"Turysci_ogolem 0.01\n",
"Turysci_zagraniczni 0.01\n",
"Wojewodztwo_Podkarpackie -0.00\n",
"Wplywy_z_oplaty_eksploatacyjnej -0.01\n",
"Wojewodztwo_Swietokrzyskie -0.01\n",
"Wojewodztwo_Zachodniopomorskie -0.01\n",
"Powierzchnia -0.01\n",
"Wojewodztwo_Slaskie -0.01\n",
"Wojewodztwo_Lubelskie -0.01\n",
"Obiekty_ogolem -0.02\n",
"Wojewodztwo_Lubuskie -0.02\n",
"Miejsca_noclegowe_ogolem -0.02\n",
"Wojewodztwo_Podlaskie -0.02\n",
"Dochody_podatek_PCC -0.02\n",
"Dochody_podatek_od_spadkow -0.02\n",
"Udzialy_w_podatkach_dochodowych_od_osob_prywatnych -0.02\n",
"Wynagrodzenie_w_relacji_do_sredniej -0.02\n",
"Dochody_z_uslug -0.02\n",
2024-05-10 19:17:54 +02:00
"Name: Suma, dtype: float64\n"
]
}
],
"source": [
"correlation_matrix = df_data.corr()\n",
"print(correlation_matrix['Suma'].sort_values(ascending=False)[1:29])"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 445,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-05-13 15:20:33 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHHCAYAAACRAnNyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABNmUlEQVR4nO3deXwUVb4+/qd6TSfp7uwbCQmEhJ2ABFBZREAR0RlwARm4sriMDrhx8ed4547AjIjznXGZcRREZHEEwQVcUFREFhGQJbJvCQQSspOt052ktzq/PyJ9jQmYhJCqNM/79aqXdNXpqk9XZtJPqs45JQkhBIiIiIhUSKN0AURERESXwqBCREREqsWgQkRERKrFoEJERESqxaBCREREqsWgQkRERKrFoEJERESqxaBCREREqsWgQkRERKrFoEJEv0qSJMybN0/pMhQ3fPhwDB8+3Pf67NmzkCQJK1asUKymX/pljUTtHYMKURt74403IEkSBg0a1OJ95OfnY968eThw4EDrFaZyW7duhSRJvkWv16Nz5864//77cebMGaXLa5adO3di3rx5qKioULoUItXTKV0A0bVm1apVSEpKwp49e5CVlYUuXbo0ex/5+fmYP38+kpKS0Ldv39YvUsUef/xxDBgwAG63GxkZGViyZAk+//xzHD58GHFxcW1aS2JiImpqaqDX65v1vp07d2L+/PmYNm0aQkJCrk5xRH6CV1SI2lB2djZ27tyJl19+GZGRkVi1apXSJbU7Q4cOxZQpUzB9+nS89tpr+Mc//oGysjKsXLnyku9xOBxXpRZJkhAQEACtVntV9k9EDCpEbWrVqlUIDQ3F2LFjcc8991wyqFRUVOCpp55CUlISjEYj4uPjcf/99+PChQvYunUrBgwYAACYPn2671bIxX4SSUlJmDZtWoN9/rLvgsvlwnPPPYf+/fvDarUiKCgIQ4cOxZYtW5r9uYqKiqDT6TB//vwG206ePAlJkvDvf/8bAOB2uzF//nykpKQgICAA4eHhGDJkCDZt2tTs4wLAiBEjANSFQACYN28eJEnCsWPH8Lvf/Q6hoaEYMmSIr/27776L/v37w2QyISwsDPfddx9yc3Mb7HfJkiVITk6GyWTCwIED8d133zVoc6k+KidOnMCECRMQGRkJk8mErl274k9/+pOvvqeffhoA0KlTJ9/P7+zZs1elRqL2jrd+iNrQqlWrcNddd8FgMGDSpElYtGgR9u7d6wseAGC32zF06FAcP34cM2bMwHXXXYcLFy7g008/xfnz59G9e3f85S9/wXPPPYeHH34YQ4cOBQDceOONzarFZrNh6dKlmDRpEh566CFUVVXh7bffxujRo7Fnz55m3VKKjo7GTTfdhPfffx9z586tt23t2rXQarW49957AdR9US9cuBAPPvggBg4cCJvNhn379iEjIwO33HJLsz4DAJw+fRoAEB4eXm/9vffei5SUFLzwwgsQQgAAFixYgD//+c+YMGECHnzwQZSUlOC1117DsGHD8OOPP/puw7z99tv4/e9/jxtvvBFPPvkkzpw5g9/85jcICwtDQkLCZes5dOgQhg4dCr1ej4cffhhJSUk4ffo0PvvsMyxYsAB33XUXTp06hffeew+vvPIKIiIiAACRkZFtViNRuyKIqE3s27dPABCbNm0SQgghy7KIj48XTzzxRL12zz33nAAg1q1b12AfsiwLIYTYu3evACCWL1/eoE1iYqKYOnVqg/U33XSTuOmmm3yvPR6PcDqd9dqUl5eL6OhoMWPGjHrrAYi5c+de9vO9+eabAoA4fPhwvfU9evQQI0aM8L1OS0sTY8eOvey+GrNlyxYBQCxbtkyUlJSI/Px88fnnn4ukpCQhSZLYu3evEEKIuXPnCgBi0qRJ9d5/9uxZodVqxYIFC+qtP3z4sNDpdL71LpdLREVFib59+9Y7P0uWLBEA6p3D7OzsBj+HYcOGCbPZLM6dO1fvOBd/dkII8fe//10AENnZ2Ve9RqL2jrd+iNrIqlWrEB0djZtvvhlAXf+GiRMnYs2aNfB6vb52H330EdLS0jB+/PgG+5AkqdXq0Wq1MBgMAABZllFWVgaPx4P09HRkZGQ0e3933XUXdDod1q5d61t35MgRHDt2DBMnTvStCwkJwdGjR5GZmdmiumfMmIHIyEjExcVh7NixcDgcWLlyJdLT0+u1e+SRR+q9XrduHWRZxoQJE3DhwgXfEhMTg5SUFN8tr3379qG4uBiPPPKI7/wAwLRp02C1Wi9bW0lJCbZv344ZM2agY8eO9bY15WfXFjUStTd+E1S2b9+OO++8E3FxcZAkCR9//HGz3n/xvvYvl6CgoKtTMF1TvF4v1qxZg5tvvhnZ2dnIyspCVlYWBg0ahKKiImzevNnX9vTp0+jVq1eb1LVy5Ur06dPH11ckMjISn3/+OSorK5u9r4iICIwcORLvv/++b93atWuh0+lw1113+db95S9/QUVFBVJTU9G7d288/fTTOHToUJOP89xzz2HTpk349ttvcejQIeTn5+O//uu/GrTr1KlTvdeZmZkQQiAlJQWRkZH1luPHj6O4uBgAcO7cOQBASkpKvfdfHA59OReHSbf059cWNRK1N37TR8XhcCAtLQ0zZsyo90uxqebMmdPgL7CRI0fW6ztA1FLffvstCgoKsGbNGqxZs6bB9lWrVuHWW29tlWNd6i93r9dbb3TKu+++i2nTpmHcuHF4+umnERUVBa1Wi4ULF/r6fTTXfffdh+nTp+PAgQPo27cv3n//fYwcOdLXDwMAhg0bhtOnT+OTTz7B119/jaVLl+KVV17B4sWL8eCDD/7qMXr37o1Ro0b9ajuTyVTvtSzLkCQJGzdubHSUTnBwcBM+4dXVHmokamt+E1TGjBmDMWPGXHK70+nEn/70J7z33nuoqKhAr1698Le//c03CiI4OLjeL4GDBw/i2LFjWLx48dUuna4Bq1atQlRUFF5//fUG29atW4f169dj8eLFMJlMSE5OxpEjRy67v8vdRggNDW10IrFz587V+2v7ww8/ROfOnbFu3bp6+/tlZ9jmGDduHH7/+9/7bv+cOnUKzz77bIN2YWFhmD59OqZPnw673Y5hw4Zh3rx5TQoqLZWcnAwhBDp16oTU1NRLtktMTARQd3Xj4ogioG60UnZ2NtLS0i753ovnt6U/v7aokai98ZtbP79m1qxZ2LVrF9asWYNDhw7h3nvvxW233XbJ++RLly5Famqqb0QFUUvV1NRg3bp1uOOOO3DPPfc0WGbNmoWqqip8+umnAIC7774bBw8exPr16xvsS/w0euXiLcnGAklycjJ2794Nl8vlW7dhw4YGw1sv/sV+cZ8A8MMPP2DXrl0t/qwhISEYPXo03n//faxZswYGgwHjxo2r16a0tLTe6+DgYHTp0gVOp7PFx22Ku+66C1qtFvPnz6/3mYG6c3CxrvT0dERGRmLx4sX1zuGKFSt+dSbZyMhIDBs2DMuWLUNOTk6DY1x0qZ9fW9RI1N74zRWVy8nJycHy5cuRk5Pjm7lyzpw5+PLLL7F8+XK88MIL9drX1tZi1apV+OMf/6hEueRnPv30U1RVVeE3v/lNo9uvv/563+RvEydOxNNPP40PP/wQ9957L2bMmIH+/fujrKwMn376KRYvXoy0tDQkJycjJCQEixcvhtlsRlBQEAYNGoROnTrhwQcfxIcffojbbrsNEyZMwOnTp/Huu+8iOTm53nHvuOMOrFu3DuPHj8fYsWORnZ2NxYsXo0ePHrDb7S3+vBMnTsSUKVPwxhtvYPTo0Q1mXu3RoweGDx+O/v37IywsDPv27cOHH36IWbNmtfiYTZGcnIznn38ezz77LM6ePYtx48bBbDYjOzsb69evx8MPP4w5c+ZAr9fj+eefx+9//3uMGDECEydORHZ2NpYvX96k/h//+te/MGTIEFx33XV4+OGH0alTJ5w9exaff/6575EH/fv3BwD86U9/wn333Qe9Xo8777yzzWokalcUGm10VQEQ69ev973esGGDACCCgoLqLTqdTkyYMKHB+1evXi10Op0oLCxsw6rJX915550iICB
2024-05-10 19:17:54 +02:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(y_test, y_pred, alpha=0.5, c=color_column_test, cmap='viridis')\n",
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.title('Actual vs Predicted')\n",
"\n",
"plt.xlim(0, max(max(y_test), max(y_pred)))\n",
"plt.ylim(0, max(max(y_test), max(y_pred)))\n",
"\n",
"plt.show()"
2024-05-06 23:50:22 +02:00
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 446,
2024-05-06 23:50:22 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-05-13 15:20:33 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABvVUlEQVR4nO3dd3hc5Z328e+ZPmqj3ixZlnvvHbBNNWBIDAltQ0xNeyELYWEXdjeUJMTJJoRkE4LDUkwSCN1AIBRjXAAbjHvvsiRbvc2oTj3vH8IKQrKxjIrtuT/XNRfMqb9zNB7dOuc5z2OYpmkiIiIiEoUsfV2AiIiISF9REBIREZGopSAkIiIiUUtBSERERKKWgpCIiIhELQUhERERiVoKQiIiIhK1FIREREQkaikIiYiISNRSEBKRHmEYBvfff39fl9Hn5syZw5w5c9reHzx4EMMwWLx4cZ/V9EVfrFEkmigIiZwC/vjHP2IYBtOmTTvhbZSUlHD//fezadOm7ivsJLdixQoMw2h72e12Bg4cyIIFCzhw4EBfl9clq1ev5v7776eurq6vSxE5rdj6ugAR+XLPPPMMAwYMYO3atezbt4/Bgwd3eRslJSU88MADDBgwgPHjx3d/kSexf/3Xf2XKlCkEg0E2bNjAY489xptvvsnWrVvJzs7u1Vry8vJobm7Gbrd3ab3Vq1fzwAMPcP3115OYmNgzxYlEIV0REjnJFRQUsHr1an7zm9+QlpbGM88809clnXLOOussrr32Wm644QZ+//vf8+tf/5qamhqefvrpo67T2NjYI7UYhoHL5cJqtfbI9kWkaxSERE5yzzzzDElJScybN49vfvObRw1CdXV1/OhHP2LAgAE4nU5ycnJYsGABVVVVrFixgilTpgBwww03tN0qOtJOZcCAAVx//fUdtvnFtiOBQIB7772XSZMm4fF4iI2N5ayzzmL58uVdPq7y8nJsNhsPPPBAh3m7d+/GMAz+8Ic/ABAMBnnggQcYMmQILpeLlJQUzjzzTJYuXdrl/QKcc845QGvIBLj//vsxDIMdO3bwL//yLyQlJXHmmWe2Lf/Xv/6VSZMm4Xa7SU5O5uqrr6a4uLjDdh977DEGDRqE2+1m6tSpfPDBBx2WOVoboV27dnHllVeSlpaG2+1m2LBh/Nd//VdbfXfddRcA+fn5bT+/gwcP9kiNItFEt8ZETnLPPPMMl19+OQ6Hg2uuuYZHH32UTz/9tC3YADQ0NHDWWWexc+dObrzxRiZOnEhVVRWvv/46hw4dYsSIEfzkJz/h3nvv5bvf/S5nnXUWADNnzuxSLT6fj8cff5xrrrmG73znO9TX1/PEE08wd+5c1q5d26VbbhkZGcyePZsXXniB++67r928559/HqvVyhVXXAG0BoGFCxdy8803M3XqVHw+H+vWrWPDhg2cf/75XToGgP379wOQkpLSbvoVV1zBkCFD+PnPf45pmgA8+OCD/PjHP+bKK6/k5ptvprKykt///vfMmjWLjRs3tt2meuKJJ/je977HzJkzuf322zlw4ABf+9rXSE5OJjc395j1bNmyhbPOOgu73c53v/tdBgwYwP79+/n73//Ogw8+yOWXX86ePXv429/+xsMPP0xqaioAaWlpvVajyGnLFJGT1rp160zAXLp0qWmaphmJRMycnBzztttua7fcvffeawLmK6+80mEbkUjENE3T/PTTT03AfOqppzosk5eXZ1533XUdps+ePducPXt22/tQKGT6/f52y9TW1poZGRnmjTfe2G46YN53333HPL4//elPJmBu3bq13fSRI0ea55xzTtv7cePGmfPmzTvmtjqzfPlyEzCffPJJs7Ky0iwpKTHffPNNc8CAAaZhGOann35qmqZp3nfffSZgXnPNNe3WP3jwoGm1Ws0HH3yw3fStW7eaNputbXogEDDT09PN8ePHtzs/jz32mAm0O4cFBQUdfg6zZs0y4+PjzcLCwnb7OfKzM03T/NWvfmUCZkFBQY/XKBJNdGtM5CT2zDPPkJGRwdlnnw20ti+56qqreO655wiHw23Lvfzyy4wbN47LLruswzYMw+i2eqxWKw6HA4BIJEJNTQ2hUIjJkyezYcOGLm/v8ssvx2az8fzzz7dN27ZtGzt27OCqq65qm5aYmMj27dvZu3fvCdV94403kpaWRnZ2NvPmzaOxsZGnn36ayZMnt1vu+9//frv3r7zyCpFIhCuvvJKqqqq2V2ZmJkOGDGm7Jbhu3ToqKir4/ve/33Z+AK6//no8Hs8xa6usrGTVqlXceOON9O/fv9284/nZ9UaNIqezqA5Cq1at4tJLLyU7OxvDMHj11Ve7tP6RdgVffMXGxvZMwRJVwuEwzz33HGeffTYFBQXs27ePffv2MW3aNMrLy1m2bFnbsvv372f06NG9UtfTTz/N2LFj29rqpKWl8eabb+L1eru8rdTUVM4991xeeOGFtmnPP/88NpuNyy+/vG3aT37yE+rq6hg6dChjxozhrrvuYsuWLce9n3vvvZelS5fy/vvvs2XLFkpKSvj2t7/dYbn8/Px27/fu3YtpmgwZMoS0tLR2r507d1JRUQFAYWEhAEOGDGm3/pHH9Y/lyGP8J/rz640aRU5nUd1GqLGxkXHjxnHjjTe2+9I9XnfeeWeHvyDPPffcdm03RE7U+++/T2lpKc899xzPPfdch/nPPPMMF1xwQbfs62hXHsLhcLunm/76179y/fXXM3/+fO666y7S09OxWq0sXLiwrd1NV1199dXccMMNbNq0ifHjx/PCCy9w7rnntrWDAZg1axb79+/ntdde49133+Xxxx/n4YcfZtGiRdx8881fuo8xY8Zw3nnnfelybre73ftIJIJhGLz11ludPuUVFxd3HEfYs06FGkVOZlEdhC666CIuuuiio873+/3813/9F3/729+oq6tj9OjR/PKXv2x7iiYuLq7dl8zmzZvZsWMHixYt6unSJQo888wzpKen88gjj3SY98orr7BkyRIWLVqE2+1m0KBBbNu27ZjbO9ZtlqSkpE476issLGx3teCll15i4MCBvPLKK+2298XGzl0xf/58vve977XdHtuzZw/33HNPh+WSk5O54YYbuOGGG2hoaGDWrFncf//9xxWETtSgQYMwTZP8/HyGDh161OXy8vKA1qszR55Ig9an3QoKChg3btxR1z1yfk/059cbNYqczqL61tiXufXWW1mzZg3PPfccW7Zs4YorruDCCy88ajuFxx9/nKFDh7Y9kSNyopqbm3nllVe45JJL+OY3v9nhdeutt1JfX8/rr78OwDe+8Q02b97MkiVLOmzL/OzppyO3bDsLPIMGDeLjjz8mEAi0TXvjjTc6PH595IrDkW0CfPLJJ6xZs+aEjzUxMZG5c+fywgsv8Nxzz+FwOJg/f367Zaqrq9u9j4uLY/Dgwfj9/hPe7/G4/PLLsVqtPPDAA+2OGVrPwZG6Jk+eTFpaGosWLWp3DhcvXvylPUGnpaUxa9YsnnzySYqKijrs44ij/fx6o0aR01lUXxE6lqKiIp566imKioraep698847efvtt3nqqaf4+c9/3m75lpYWnnnmGe6+++6+KFdOM6+//jr19fV87Wtf63T+9OnT2zpXvOqqq7jrrrt46aWXuOKKK7jxxhuZNGkSNTU1vP766yxatIhx48YxaNAgEhMTWbRoEfHx8cTGxjJt2jTy8/O5+eabeemll7jwwgu58sor2b9/P3/9618ZNGhQu/1ecsklvPLKK1x22WXMmzePgoICFi1axMiRI2loaDjh473qqqu49tpr+eMf/8jcuXM79Jw8cuRI5syZw6RJk0hOTmbdunW89NJL3HrrrSe8z+MxaNAgfvazn3HPPfdw8OBB5s+fT3x8PAUFBSxZsoTvfve73Hnnndjtdn72s5/xve99j3POOYerrrqKgoICnnrqqeNqf/O///u/nHnmmUycOJHvfve75Ofnc/DgQd588822IVEmTZoEwH/9139x9dVXY7fbufTSS3utRpHTVh89rXbSAcwlS5a0vX/jjTdMwIyNjW33stls5pVXXtlh/Weffda02Wx
2024-05-06 23:50:22 +02:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-05-10 19:17:54 +02:00
"plt.scatter(y_test, y_pred, alpha=0.5, c=color_column_test, cmap='viridis')\n",
2024-05-06 23:50:22 +02:00
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.title('Actual vs Predicted')\n",
2024-05-10 19:17:54 +02:00
"\n",
"plt.xlim(0, 3*10**7)\n",
"plt.ylim(0, 3*10**7)\n",
"\n",
2024-05-06 23:50:22 +02:00
"plt.show()"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 447,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAADirklEQVR4nOzdd3xc9Zno/8+ZPhpp1Hu3JMuWe8W90IxpISRAyBJaSLkL2WSzye6S3BQ2xTe/hE3u7t3gsARIFgiEhF4Nrrjg3m3JVu+9jMrUc87vj7FlFMnGsiWN5Hner5deMOecOec5Y2nmmW95voqu6zpCCCGEEGHIEOoAhBBCCCFCRRIhIYQQQoQtSYSEEEIIEbYkERJCCCFE2JJESAghhBBhSxIhIYQQQoQtSYSEEEIIEbYkERJCCCFE2JJESAghhBBhSxIhIcSoUBSFH//4x6EOI+RWrVrFqlWr+h9XVlaiKArPPvtsyGL6W38boxDhRBIhISaA3/72tyiKwlVXXXXJ56ivr+fHP/4xhw4dGrnAxrktW7agKEr/j9lsZtKkSdx7772Ul5eHOrxh2blzJz/+8Y/p7OwMdShCXFFMoQ5ACPHpnn/+eXJyctizZw+lpaXk5+cP+xz19fU89thj5OTkMHv27JEPchz7h3/4BxYsWIDf7+fAgQM8+eSTvP322xw9epS0tLQxjSU7Oxu3243ZbB7W83bu3Mljjz3G/fffT0xMzOgEJ0QYkhYhIca5iooKdu7cyb//+7+TmJjI888/H+qQJpzly5dzzz338MADD/Cf//mf/OpXv6K9vZ0//OEP531Ob2/vqMSiKAo2mw2j0Tgq5xdCDI8kQkKMc88//zyxsbHcdNNNfP7znz9vItTZ2ck//uM/kpOTg9VqJSMjg3vvvZfW1la2bNnCggULAHjggQf6u4rOjlPJycnh/vvvH3TOvx074vP5+OEPf8i8efOIjo7G4XCwfPlyNm/ePOz7ampqwmQy8dhjjw3aV1JSgqIo/L//9/8A8Pv9PPbYYxQUFGCz2YiPj2fZsmV88MEHw74uwNVXXw0Ek0yAH//4xyiKwokTJ/jiF79IbGwsy5Yt6z/+ueeeY968edjtduLi4vjCF75ATU3NoPM++eST5OXlYbfbWbhwIR999NGgY843Rqi4uJg777yTxMRE7HY7hYWFfP/73++P77vf/S4Aubm5/f9+lZWVoxKjEOFEusaEGOeef/55br/9diwWC3fffTdPPPEEe/fu7U9sAHp6eli+fDknT57kwQcfZO7cubS2tvLGG29QW1vL1KlT+bd/+zd++MMf8tWvfpXly5cDsGTJkmHF4nK5eOqpp7j77rv5yle+Qnd3N7///e9Zs2YNe/bsGVaXW3JyMitXruTPf/4zP/rRjwbse+mllzAajdxxxx1AMBFYt24dDz30EAsXLsTlcrFv3z4OHDjAddddN6x7ACgrKwMgPj5+wPY77riDgoICfv7zn6PrOgA/+9nP+MEPfsCdd97JQw89REtLC//5n//JihUrOHjwYH831e9//3u+9rWvsWTJEr71rW9RXl7OrbfeSlxcHJmZmReM58iRIyxfvhyz2cxXv/pVcnJyKCsr48033+RnP/sZt99+O6dOneJPf/oTv/71r0lISAAgMTFxzGIU4oqlCyHGrX379umA/sEHH+i6ruuapukZGRn6N7/5zQHH/fCHP9QB/ZVXXhl0Dk3TdF3X9b179+qA/swzzww6Jjs7W7/vvvsGbV+5cqW+cuXK/seBQED3er0Djuno6NCTk5P1Bx98cMB2QP/Rj350wfv73e9+pwP60aNHB2wvKirSr7766v7Hs2bN0m+66aYLnmsomzdv1gH96aef1ltaWvT6+nr97bff1nNycnRFUfS9e/fquq7rP/rRj3RAv/vuuwc8v7KyUjcajfrPfvazAduPHj2qm0ym/u0+n09PSkrSZ8+ePeD1efLJJ3VgwGtYUVEx6N9hxYoVelRUlF5VVTXgOmf/7XRd13/5y1/qgF5RUTHqMQoRTqRrTIhx7Pnnnyc5OZnVq1cDwfEld911Fy+++CKqqvYf99e//pVZs2bx2c9+dtA5FEUZsXiMRiMWiwUATdNob28nEAgwf/58Dhw4MOzz3X777ZhMJl566aX+bceOHePEiRPcdddd/dtiYmI4fvw4p0+fvqS4H3zwQRITE0lLS+Omm26it7eXP/zhD8yfP3/AcV//+tcHPH7llVfQNI0777yT1tbW/p+UlBQKCgr6uwT37dtHc3MzX//61/tfH4D777+f6OjoC8bW0tLCtm3bePDBB8nKyhqw72L+7cYiRiGuZGGdCG3bto1bbrmFtLQ0FEXhtddeG/Y5dF3nV7/6FZMnT8ZqtZKens7PfvazkQ9WhB1VVXnxxRdZvXo1FRUVlJaWUlpaylVXXUVTUxMbN27sP7asrIzp06ePSVx/+MMfmDlzZv9YncTERN5++226urqGfa6EhASuueYa/vznP/dve+mllzCZTNx+++392/7t3/6Nzs5OJk+ezIwZM/jud7/LkSNHLvo6P/zhD/nggw/YtGkTR44cob6+ni996UuDjsvNzR3w+PTp0+i6TkFBAYmJiQN+Tp48SXNzMwBVVVUAFBQUDHj+2en6F3J2Gv+l/vuNRYxCXMnCeoxQb28vs2bN4sEHHxzwpjsc3/zmN9mwYQO/+tWvmDFjBu3t7bS3t49wpCIcbdq0iYaGBl588UVefPHFQfuff/55rr/++hG51vlaHlRVHTC76bnnnuP+++/ntttu47vf/S5JSUkYjUbWrVvXP+5muL7whS/wwAMPcOjQIWbPns2f//xnrrnmmv5xMAArVqygrKyM119/nQ0bNvDUU0/x61//mvXr1/PQQw996jVmzJjBtdde+6nH2e32AY81TUNRFN59990hZ3lFRkZexB2OrokQoxDjWVgnQmvXrmXt2rXn3e/1evn+97/Pn/70Jzo7O5k+fTq/+MUv+mfRnDx5kieeeIJjx45RWFgIDP5GKcSlev7550lKSuK//uu/Bu175ZVXePXVV1m/fj12u528vDyOHTt2wfNdqJslNjZ2yEJ9VVVVA1oL/vKXvzBp0iReeeWVAef728HOw3Hbbbfxta99rb977NSpUzz66KODjouLi+OBBx7ggQceoKenhxUrVvDjH//4ohKhS5WXl4eu6+Tm5jJ58uTzHpednQ0EW2fOzkiD4Gy3iooKZs2add7nnn19L/XfbyxiFOJKFtZdY5/mkUceYdeuXbz44oscOXKEO+64gxtuuKF/nMKbb77JpEmTeOutt8jNzSUnJ4eHHnpIWoTEZXO73bzyyivcfPPNfP7znx/088gjj9Dd3c0bb7wBwOc+9zkOHz7Mq6++Ouhc+pnZTw6HA2DIhCcvL4+PP/4Yn8/Xv+2tt94aNP36bIvD2XMC7N69m127dl3yvcbExLBmzRr+/Oc/8+KLL2KxWLjtttsGHNPW1jbgcWRkJPn5+Xi93ku+7sW4/fbbMRqNPPbYYwPuGYKvwdm45s+fT2JiIuvXrx/wGj777LOfWgk6MTGRFStW8PTTT1NdXT3oGmed799vLGIU4koW1i1CF1JdXc0zzzxDdXV1f+XZ73znO7z33ns888wz/PznP6e8vJyqqipefvll/vjHP6KqKv/4j//I5z//eTZt2hTiOxAT2RtvvEF3dze33nrrkPsXLVrUX1zxrrvu4rvf/S5/+ctfuOOOO3jwwQeZN28e7e3tvPHGG6xfv55Zs2aRl5dHTEwM69evJyoqCofDwVVXXUVubi4PPfQQf/nLX7jhhhu48847KSsr47nnniMvL2/AdW+++WZeeeUVPvvZz3LTTTdRUVHB+vXrKSoqoqen55Lv96677uKee+7ht7/9LWvWrBlUObmoqIhVq1Yxb9484uLi2LdvH3/5y1945JFHLvmaFyMvL4+f/vSnPProo1RWVnLbbbcRFRVFRUUFr776Kl/96lf5zne+g9ls5qc//Slf+9rXuPrqq7nrrruoqKjgmWeeuajxN//xH//BsmXLmDt3Ll/96lfJzc2lsrKSt99
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(y_test, y_pred, alpha=0.5, c=color_column_test, cmap='viridis')\n",
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.title('Actual vs Predicted')\n",
"\n",
"plt.xlim(0, 3*10**6)\n",
"plt.ylim(0, 3*10**6)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 448,
2024-05-10 19:17:54 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"|--- Wplywy_z_oplaty_eksploatacyjnej <= -7751.83\n",
"| |--- value: [132625880.92]\n",
"|--- Wplywy_z_oplaty_eksploatacyjnej > -7751.83\n",
"| |--- Wplywy_z_oplaty_skarbowej <= 4494.45\n",
"| | |--- Wymeldowania_ogolem <= 56.50\n",
"| | | |--- Ludnosc_ogolem <= 3786.50\n",
"| | | | |--- Wplywy_z_oplaty_skarbowej <= 3336.50\n",
"| | | | | |--- Dochody_podatek_rolny <= 52241.90\n",
"| | | | | | |--- value: [223720.88]\n",
"| | | | | |--- Dochody_podatek_rolny > 52241.90\n",
"| | | | | | |--- Zameldowania_z_miast_ogolem <= 5.50\n",
"| | | | | | | |--- value: [146790.02]\n",
"| | | | | | |--- Zameldowania_z_miast_ogolem > 5.50\n",
"| | | | | | | |--- value: [147649.40]\n",
"| | | | |--- Wplywy_z_oplaty_skarbowej > 3336.50\n",
"| | | | | |--- Wymeldowania_do_miast_mezczyzni <= 4.00\n",
"| | | | | | |--- value: [4988446.69]\n",
"| | | | | |--- Wymeldowania_do_miast_mezczyzni > 4.00\n",
"| | | | | | |--- Dochody_z_najmu_i_dzierzawy <= 47510.35\n",
"| | | | | | | |--- value: [8029322.26]\n",
"| | | | | | |--- Dochody_z_najmu_i_dzierzawy > 47510.35\n",
"| | | | | | | |--- value: [7792324.25]\n",
"| | | |--- Ludnosc_ogolem > 3786.50\n",
"| | | | |--- value: [35264017.53]\n",
"| | |--- Wymeldowania_ogolem > 56.50\n",
"| | | |--- value: [106444841.13]\n",
"| |--- Wplywy_z_oplaty_skarbowej > 4494.45\n",
"| | |--- Wplywy_z_oplaty_skarbowej <= 12330.00\n",
"| | | |--- Dochody_podatek_od_dzialalnosci_gospodarczej <= -156.50\n",
"| | | | |--- value: [64551082.93]\n",
"| | | |--- Dochody_podatek_od_dzialalnosci_gospodarczej > -156.50\n",
"| | | | |--- Dochody_podatek_lesny <= 1523.00\n",
"| | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym <= 393.00\n",
"| | | | | | |--- value: [50040444.36]\n",
"| | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym > 393.00\n",
"| | | | | | |--- value: [593983.01]\n",
"| | | | |--- Dochody_podatek_lesny > 1523.00\n",
"| | | | | |--- Dochody_podatek_PCC <= 206681.93\n",
"| | | | | | |--- Bezrobotni_mezczyzni <= 112.00\n",
"| | | | | | | |--- Wplywy_z_oplaty_targowej <= 1527.50\n",
"| | | | | | | | |--- Zameldowania_z_miast_ogolem <= 26.50\n",
"| | | | | | | | | |--- Zameldowania_mezczyzni <= 6.00\n",
"| | | | | | | | | | |--- Zameldowania_kobiety <= 6.00\n",
"| | | | | | | | | | | |--- value: [1029930.53]\n",
"| | | | | | | | | | |--- Zameldowania_kobiety > 6.00\n",
"| | | | | | | | | | | |--- value: [1142691.81]\n",
"| | | | | | | | | |--- Zameldowania_mezczyzni > 6.00\n",
"| | | | | | | | | | |--- Dochody_podatek_lesny <= 8673.77\n",
"| | | | | | | | | | | |--- value: [1080165.70]\n",
"| | | | | | | | | | |--- Dochody_podatek_lesny > 8673.77\n",
"| | | | | | | | | | | |--- truncated branch of depth 11\n",
"| | | | | | | | |--- Zameldowania_z_miast_ogolem > 26.50\n",
"| | | | | | | | | |--- Wymeldowania_mezczyzni <= 27.50\n",
"| | | | | | | | | | |--- Dochody_podatek_od_srodkow_transportowych <= 144168.41\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | | |--- Dochody_podatek_od_srodkow_transportowych > 144168.41\n",
"| | | | | | | | | | | |--- value: [3266211.60]\n",
"| | | | | | | | | |--- Wymeldowania_mezczyzni > 27.50\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem <= 34.50\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 4\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem > 34.50\n",
"| | | | | | | | | | | |--- value: [886572.54]\n",
"| | | | | | | |--- Wplywy_z_oplaty_targowej > 1527.50\n",
"| | | | | | | | |--- Dochody_dofinansowanie_inwestycyjne <= 500.00\n",
"| | | | | | | | | |--- Ludnosc_ogolem <= 3521.00\n",
"| | | | | | | | | | |--- Wplywy_z_oplaty_targowej <= 9416.50\n",
"| | | | | | | | | | | |--- value: [2852332.52]\n",
"| | | | | | | | | | |--- Wplywy_z_oplaty_targowej > 9416.50\n",
"| | | | | | | | | | | |--- value: [2869988.58]\n",
"| | | | | | | | | |--- Ludnosc_ogolem > 3521.00\n",
"| | | | | | | | | | |--- Dochody_podatek_od_spadkow <= 6173.00\n",
"| | | | | | | | | | | |--- value: [4041277.35]\n",
"| | | | | | | | | | |--- Dochody_podatek_od_spadkow > 6173.00\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | |--- Dochody_dofinansowanie_inwestycyjne > 500.00\n",
"| | | | | | | | | |--- Zameldowania_ze_wsi_mezczyzni <= 11.00\n",
"| | | | | | | | | | |--- Dochody_z_uslug <= 254898.80\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | | |--- Dochody_z_uslug > 254898.80\n",
"| | | | | | | | | | | |--- truncated branch of depth 4\n",
"| | | | | | | | | |--- Zameldowania_ze_wsi_mezczyzni > 11.00\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem <= 21.00\n",
"| | | | | | | | | | | |--- value: [3043529.06]\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem > 21.00\n",
"| | | | | | | | | | | |--- value: [1660371.13]\n",
"| | | | | | |--- Bezrobotni_mezczyzni > 112.00\n",
"| | | | | | | |--- Dochody_podatek_od_srodkow_transportowych <= 112158.71\n",
"| | | | | | | | |--- Zameldowania_z_miast_kobiety <= 29.50\n",
"| | | | | | | | | |--- Wymeldowania_na_wies_mezczyzni <= 11.50\n",
"| | | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_razem <= 2784626.12\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 5\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_razem > 2784626.12\n",
"| | | | | | | | | | | |--- value: [1011703.86]\n",
"| | | | | | | | | |--- Wymeldowania_na_wies_mezczyzni > 11.50\n",
"| | | | | | | | | | |--- Zameldowania_z_miast_ogolem <= 19.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
"| | | | | | | | | | |--- Zameldowania_z_miast_ogolem > 19.00\n",
"| | | | | | | | | | | |--- value: [5361057.90]\n",
"| | | | | | | | |--- Zameldowania_z_miast_kobiety > 29.50\n",
"| | | | | | | | | |--- value: [17138312.52]\n",
"| | | | | | | |--- Dochody_podatek_od_srodkow_transportowych > 112158.71\n",
"| | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_fizycznych <= 2183203.00\n",
"| | | | | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym <= 1325.50\n",
"| | | | | | | | | | |--- value: [16479146.65]\n",
"| | | | | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym > 1325.50\n",
"| | | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_razem <= 1710148.12\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_razem > 1710148.12\n",
"| | | | | | | | | | | |--- value: [19699871.32]\n",
"| | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_fizycznych > 2183203.00\n",
"| | | | | | | | | |--- Ludnosc_kobiety <= 3141.00\n",
"| | | | | | | | | | |--- value: [5923600.26]\n",
"| | | | | | | | | |--- Ludnosc_kobiety > 3141.00\n",
"| | | | | | | | | | |--- value: [6925131.43]\n",
"| | | | | |--- Dochody_podatek_PCC > 206681.93\n",
"| | | | | | |--- Bezrobotne_kobiety <= 57.00\n",
"| | | | | | | |--- value: [22537878.79]\n",
"| | | | | | |--- Bezrobotne_kobiety > 57.00\n",
"| | | | | | | |--- value: [24414621.52]\n",
"| | |--- Wplywy_z_oplaty_skarbowej > 12330.00\n",
"| | | |--- Wymeldowania_na_wies_mezczyzni <= 20.50\n",
"| | | | |--- Dochody_podatek_lesny <= 15.00\n",
"| | | | | |--- Dochody_dofinansowanie_razem <= 257331.97\n",
"| | | | | | |--- value: [3008745.87]\n",
"| | | | | |--- Dochody_dofinansowanie_razem > 257331.97\n",
"| | | | | | |--- value: [70149861.07]\n",
"| | | | |--- Dochody_podatek_lesny > 15.00\n",
"| | | | | |--- Turysci_zagraniczni <= 3835.00\n",
"| | | | | | |--- Wplywy_z_oplaty_targowej <= 1021444.50\n",
"| | | | | | | |--- Wojewodztwo_Opolskie <= 0.50\n",
"| | | | | | | | |--- Turysci_zagraniczni <= 620.50\n",
"| | | | | | | | | |--- Dochody_z_najmu_i_dzierzawy <= 60187.61\n",
"| | | | | | | | | | |--- Dochody_z_najmu_i_dzierzawy <= 59932.48\n",
"| | | | | | | | | | | |--- truncated branch of depth 16\n",
"| | | | | | | | | | |--- Dochody_z_najmu_i_dzierzawy > 59932.48\n",
"| | | | | | | | | | | |--- value: [23878760.16]\n",
"| | | | | | | | | |--- Dochody_z_najmu_i_dzierzawy > 60187.61\n",
"| | | | | | | | | | |--- Wynagrodzenie_ogolem <= 6881.45\n",
"| | | | | | | | | | | |--- truncated branch of depth 31\n",
"| | | | | | | | | | |--- Wynagrodzenie_ogolem > 6881.45\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | |--- Turysci_zagraniczni > 620.50\n",
"| | | | | | | | | |--- Dochody_podatek_rolny <= 1169820.38\n",
"| | | | | | | | | | |--- Wojewodztwo_Lubelskie <= 0.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 7\n",
"| | | | | | | | | | |--- Wojewodztwo_Lubelskie > 0.50\n",
"| | | | | | | | | | | |--- value: [10510654.58]\n",
"| | | | | | | | | |--- Dochody_podatek_rolny > 1169820.38\n",
"| | | | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym <= 7303.50\n",
"| | | | | | | | | | | |--- value: [16143701.14]\n",
"| | | | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym > 7303.50\n",
"| | | | | | | | | | | |--- value: [9428912.87]\n",
"| | | | | | | |--- Wojewodztwo_Opolskie > 0.50\n",
"| | | | | | | | |--- Gestosc_zaludnienia <= 0.04\n",
"| | | | | | | | | |--- value: [29647883.87]\n",
"| | | | | | | | |--- Gestosc_zaludnienia > 0.04\n",
"| | | | | | | | | |--- Dochody_podatek_od_spadkow <= 66477.52\n",
"| | | | | | | | | | |--- Dochody_podatek_od_dzialalnosci_gospodarczej <= 19548.15\n",
"| | | | | | | | | | | |--- truncated branch of depth 5\n",
"| | | | | | | | | | |--- Dochody_podatek_od_dzialalnosci_gospodarczej > 19548.15\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | |--- Dochody_podatek_od_spadkow > 66477.52\n",
"| | | | | | | | | | |--- value: [11839160.68]\n",
"| | | | | | |--- Wplywy_z_oplaty_targowej > 1021444.50\n",
"| | | | | | | |--- value: [16775284.13]\n",
"| | | | | |--- Turysci_zagraniczni > 3835.00\n",
"| | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym <= 2067.50\n",
"| | | | | | | |--- Ludnosc_w_wieku_poprodukcyjnym <= 1294.00\n",
"| | | | | | | | |--- Dochody_podatek_od_srodkow_transportowych <= 350567.75\n",
"| | | | | | | | | |--- Dochody_podatek_od_nieruchomosci <= 10820965.50\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem <= 22.50\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem > 22.50\n",
"| | | | | | | | | | | |--- value: [5838.01]\n",
"| | | | | | | | | |--- Dochody_podatek_od_nieruchomosci > 10820965.50\n",
"| | | | | | | | | | |--- value: [523333.48]\n",
"| | | | | | | | |--- Dochody_podatek_od_srodkow_transportowych > 350567.75\n",
"| | | | | | | | | |--- value: [3219570.01]\n",
"| | | | | | | |--- Ludnosc_w_wieku_poprodukcyjnym > 1294.00\n",
"| | | | | | | | |--- Bezrobotni_ogolem <= 326.25\n",
"| | | | | | | | | |--- value: [9680488.61]\n",
"| | | | | | | | |--- Bezrobotni_ogolem > 326.25\n",
"| | | | | | | | | |--- value: [6533125.31]\n",
"| | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym > 2067.50\n",
"| | | | | | | |--- value: [68031871.39]\n",
"| | | |--- Wymeldowania_na_wies_mezczyzni > 20.50\n",
"| | | | |--- Dochody_podatek_rolny <= 3939909.12\n",
"| | | | | |--- Turysci_zagraniczni <= 14.50\n",
"| | | | | | |--- Dochody_podatek_od_srodkow_transportowych <= 510970.09\n",
"| | | | | | | |--- Dochody_podatek_od_dzialalnosci_gospodarczej <= 574395.34\n",
"| | | | | | | | |--- Bezrobotni_do_25_roku_zycia <= 287.25\n",
"| | | | | | | | | |--- Wynagrodzenie_ogolem <= 4332.75\n",
"| | | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej <= 97.90\n",
"| | | | | | | | | | | |--- truncated branch of depth 27\n",
"| | | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej > 97.90\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | |--- Wynagrodzenie_ogolem > 4332.75\n",
"| | | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_prywatnych <= 1913257.88\n",
"| | | | | | | | | | | |--- truncated branch of depth 23\n",
"| | | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_prywatnych > 1913257.88\n",
"| | | | | | | | | | | |--- truncated branch of depth 7\n",
"| | | | | | | | |--- Bezrobotni_do_25_roku_zycia > 287.25\n",
"| | | | | | | | | |--- value: [4226674.84]\n",
"| | | | | | | |--- Dochody_podatek_od_dzialalnosci_gospodarczej > 574395.34\n",
"| | | | | | | | |--- value: [11386418.96]\n",
"| | | | | | |--- Dochody_podatek_od_srodkow_transportowych > 510970.09\n",
"| | | | | | | |--- Wplywy_z_innych_lokalnych_oplat <= 4156267.12\n",
"| | | | | | | | |--- Wplywy_z_innych_lokalnych_oplat <= 4121841.12\n",
"| | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej <= 111.65\n",
"| | | | | | | | | | |--- Ludnosc_na_1_km2 <= 233.25\n",
"| | | | | | | | | | | |--- truncated branch of depth 21\n",
"| | | | | | | | | | |--- Ludnosc_na_1_km2 > 233.25\n",
"| | | | | | | | | | | |--- truncated branch of depth 17\n",
"| | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej > 111.65\n",
"| | | | | | | | | | |--- Bezrobotni_ogolem <= 485.50\n",
"| | | | | | | | | | | |--- value: [3730137.10]\n",
"| | | | | | | | | | |--- Bezrobotni_ogolem > 485.50\n",
"| | | | | | | | | | | |--- value: [307553.43]\n",
"| | | | | | | | |--- Wplywy_z_innych_lokalnych_oplat > 4121841.12\n",
"| | | | | | | | | |--- Wplywy_z_oplaty_eksploatacyjnej <= 14062.80\n",
"| | | | | | | | | | |--- value: [4671836.69]\n",
"| | | | | | | | | |--- Wplywy_z_oplaty_eksploatacyjnej > 14062.80\n",
"| | | | | | | | | | |--- value: [863032.31]\n",
"| | | | | | | |--- Wplywy_z_innych_lokalnych_oplat > 4156267.12\n",
"| | | | | | | | |--- Gestosc_zaludnienia <= 3.88\n",
"| | | | | | | | | |--- Wynagrodzenie_ogolem <= 3962.87\n",
"| | | | | | | | | | |--- Wskaznik_urbanizacji <= 45.15\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | | |--- Wskaznik_urbanizacji > 45.15\n",
"| | | | | | | | | | | |--- truncated branch of depth 22\n",
"| | | | | | | | | |--- Wynagrodzenie_ogolem > 3962.87\n",
"| | | | | | | | | | |--- Wplywy_z_oplaty_eksploatacyjnej <= 5666409.75\n",
"| | | | | | | | | | | |--- truncated branch of depth 22\n",
"| | | | | | | | | | |--- Wplywy_z_oplaty_eksploatacyjnej > 5666409.75\n",
"| | | | | | | | | | | |--- value: [1478204.01]\n",
"| | | | | | | | |--- Gestosc_zaludnienia > 3.88\n",
"| | | | | | | | | |--- value: [2622969.18]\n",
"| | | | | |--- Turysci_zagraniczni > 14.50\n",
"| | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_prywatnych <= 115357.00\n",
"| | | | | | | |--- Bezrobotni_do_25_roku_zycia <= 231.75\n",
"| | | | | | | | |--- Dochody_podatek_rolny <= 751889.41\n",
"| | | | | | | | | |--- Miejsca_noclegowe_caloroczne <= 26.00\n",
"| | | | | | | | | | |--- value: [2137619.22]\n",
"| | | | | | | | | |--- Miejsca_noclegowe_caloroczne > 26.00\n",
"| | | | | | | | | | |--- Dochody_podatek_od_dzialalnosci_gospodarczej <= 21053.86\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | | | |--- Dochody_podatek_od_dzialalnosci_gospodarczej > 21053.86\n",
2024-05-10 19:17:54 +02:00
"| | | | | | | | | | | |--- truncated branch of depth 4\n",
2024-05-13 15:20:33 +02:00
"| | | | | | | | |--- Dochody_podatek_rolny > 751889.41\n",
"| | | | | | | | | |--- Wymeldowania_do_miast_ogolem <= 103.00\n",
"| | | | | | | | | | |--- value: [6335790.21]\n",
"| | | | | | | | | |--- Wymeldowania_do_miast_ogolem > 103.00\n",
"| | | | | | | | | | |--- Obiekty_caloroczne <= 1.50\n",
"| | | | | | | | | | | |--- value: [3478051.49]\n",
"| | | | | | | | | | |--- Obiekty_caloroczne > 1.50\n",
"| | | | | | | | | | | |--- value: [2808492.26]\n",
"| | | | | | | |--- Bezrobotni_do_25_roku_zycia > 231.75\n",
"| | | | | | | | |--- Zameldowania_mezczyzni <= 87.50\n",
"| | | | | | | | | |--- value: [9530029.04]\n",
"| | | | | | | | |--- Zameldowania_mezczyzni > 87.50\n",
"| | | | | | | | | |--- value: [14823699.73]\n",
"| | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_prywatnych > 115357.00\n",
"| | | | | | | |--- Wojewodztwo_Opolskie <= 0.50\n",
"| | | | | | | | |--- Dochody_z_uslug <= 4892.30\n",
"| | | | | | | | | |--- Wymeldowania_kobiety <= 110.00\n",
"| | | | | | | | | | |--- value: [726982.20]\n",
"| | | | | | | | | |--- Wymeldowania_kobiety > 110.00\n",
"| | | | | | | | | | |--- value: [7756910.77]\n",
"| | | | | | | | |--- Dochody_z_uslug > 4892.30\n",
"| | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej <= 70.60\n",
"| | | | | | | | | | |--- Wskaznik_urbanizacji <= 58.35\n",
"| | | | | | | | | | | |--- value: [3473795.48]\n",
"| | | | | | | | | | |--- Wskaznik_urbanizacji > 58.35\n",
"| | | | | | | | | | | |--- value: [4769144.63]\n",
"| | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej > 70.60\n",
"| | | | | | | | | | |--- Wplywy_z_innych_lokalnych_oplat <= 566851.53\n",
"| | | | | | | | | | | |--- truncated branch of depth 9\n",
"| | | | | | | | | | |--- Wplywy_z_innych_lokalnych_oplat > 566851.53\n",
"| | | | | | | | | | | |--- truncated branch of depth 23\n",
"| | | | | | | |--- Wojewodztwo_Opolskie > 0.50\n",
"| | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_fizycznych <= 9089692.00\n",
"| | | | | | | | | |--- value: [12432814.85]\n",
"| | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_fizycznych > 9089692.00\n",
"| | | | | | | | | |--- Zameldowania_z_miast_kobiety <= 140.50\n",
"| | | | | | | | | | |--- Dochody_podatek_lesny <= 120971.51\n",
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
"| | | | | | | | | | |--- Dochody_podatek_lesny > 120971.51\n",
"| | | | | | | | | | | |--- value: [1383897.89]\n",
"| | | | | | | | | |--- Zameldowania_z_miast_kobiety > 140.50\n",
"| | | | | | | | | | |--- value: [4817436.62]\n",
"| | | | |--- Dochody_podatek_rolny > 3939909.12\n",
"| | | | | |--- value: [9110050.20]\n",
2024-05-10 19:17:54 +02:00
"\n"
]
}
],
"source": [
"print(export_text(model, feature_names=feature_names))"
]
},
{
"cell_type": "code",
2024-05-13 15:20:33 +02:00
"execution_count": 449,
2024-05-06 23:50:22 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-05-13 15:20:33 +02:00
"0.32120 — Wplywy_z_oplaty_eksploatacyjnej\n",
"0.15728 — Wymeldowania_ogolem\n",
"0.07384 — Wplywy_z_oplaty_skarbowej\n",
"0.07115 — Dochody_podatek_od_dzialalnosci_gospodarczej\n",
"0.07098 — Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym\n",
"0.06455 — Dochody_podatek_lesny\n",
"0.04225 — Dochody_dofinansowanie_razem\n",
"0.02294 — Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym\n",
"0.01696 — Dochody_podatek_PCC\n",
"0.01605 — Ludnosc_ogolem\n",
"0.01528 — Turysci_zagraniczni\n",
"0.01449 — Dochody_podatek_od_srodkow_transportowych\n",
"0.01294 — Gestosc_zaludnienia\n",
"0.01121 — Dochody_z_najmu_i_dzierzawy\n",
"0.00803 — Bezrobotni_mezczyzni\n",
"0.00639 — Udzialy_w_podatkach_dochodowych_od_osob_fizycznych\n",
"0.00608 — Dochody_podatek_rolny\n",
"0.00596 — Wymeldowania_na_wies_mezczyzni\n",
"0.00590 — Wynagrodzenie_ogolem\n",
"0.00527 — Wymeldowania_kobiety\n",
"0.00507 — Wplywy_z_oplaty_targowej\n",
"0.00475 — Zameldowania_z_miast_kobiety\n",
"0.00388 — Saldo_migracji_na_1000_ludnosci\n",
"0.00365 — Bezrobotni_do_25_roku_zycia\n",
"0.00357 — Wojewodztwo_Opolskie\n",
"0.00292 — Zmiana_liczby_ludnosci\n",
"0.00276 — Wynagrodzenie_w_relacji_do_sredniej\n",
"0.00266 — Dochody_podatek_od_spadkow\n",
"0.00202 — Ludnosc_w_wieku_poprodukcyjnym\n",
"0.00195 — Udzialy_w_podatkach_dochodowych_od_osob_prywatnych\n",
"0.00188 — Wplywy_z_innych_lokalnych_oplat\n",
"0.00156 — Wojewodztwo_Lubelskie\n",
"0.00134 — Wymeldowania_na_wies_kobiety\n",
"0.00106 — Udzialy_w_podatkach_dochodowych_razem\n",
"0.00088 — Ludnosc_kobiety_w_wieku_poprodukcyjnym\n",
"0.00088 — Dochody_z_uslug\n",
"0.00079 — Powierzchnia\n",
"0.00066 — Dochody_podatek_od_nieruchomosci\n",
"0.00062 — Zameldowania_z_miast_ogolem\n",
"0.00057 — Zameldowania_z_miast_mezczyzni\n",
"0.00055 — Zameldowania_mezczyzni\n",
"0.00054 — Zameldowania_ze_wsi_mezczyzni\n",
"0.00047 — Ludnosc_w_wieku_produkcyjnym\n",
"0.00042 — Miejsca_noclegowe_ogolem\n",
"0.00036 — Dochody_dofinansowanie_inwestycyjne\n",
"0.00036 — Ludnosc_mezczyzni_w_wieku_produkcyjnym\n",
"0.00035 — Wymeldowania_na_wies_ogolem\n",
"0.00032 — Ludnosc_kobiety_w_wieku_produkcyjnym\n",
"0.00030 — Wojewodztwo_Podkarpackie\n",
"0.00028 — Wymeldowania_do_miast_ogolem\n",
"0.00027 — Wymeldowania_do_miast_kobiety\n",
"0.00027 — Dochody_z_majatku\n",
"0.00023 — Bezrobotne_kobiety\n",
"0.00021 — Wymeldowania_do_miast_mezczyzni\n",
"0.00021 — Ludnosc_w_wieku_produkcyjnym_mobilnym\n",
"0.00021 — Bezrobotni_ogolem\n",
"0.00021 — Ludnosc_w_wieku_przedprodukcyjnym\n",
"0.00018 — Ludnosc_na_1_km2\n",
"0.00017 — Obiekty_ogolem\n",
"0.00017 — Wymeldowania_mezczyzni\n",
"0.00014 — Ludnosc_mezczyzni_w_wieku_poprodukcyjnym\n",
"0.00014 — Zameldowania_ze_wsi_kobiety\n",
"0.00012 — Wskaznik_urbanizacji\n",
"0.00011 — Zameldowania_kobiety\n",
"0.00011 — Miejsca_noclegowe_caloroczne\n",
"0.00010 — Bezrobotni_powyzej_50_roku_zycia\n",
"0.00007 — Zameldowania_ze_wsi_ogolem\n",
"0.00006 — Wojewodztwo_Warminsko_Mazurskie\n",
"0.00006 — Dochody_razem\n",
"0.00005 — Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym\n",
"0.00004 — Saldo_migracji\n",
"0.00004 — Wojewodztwo_Slaskie\n",
"0.00004 — Wojewodztwo_Pomorskie\n",
"0.00003 — Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym\n",
"0.00003 — Dlugotrwale_bezrobotni\n",
"0.00002 — Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym\n",
"0.00002 — Wojewodztwo_Lodzkie\n",
"0.00001 — Turysci_ogolem\n",
"0.00001 — Ludnosc_kobiety\n",
"0.00001 — Obiekty_caloroczne\n",
"0.00001 — Ludnosc\n",
"0.00001 — Ludnosc_w_wieku_produkcyjnym_niemobilnym\n",
"0.00001 — Wojewodztwo_Mazowieckie\n",
"0.00001 — Dochody_podatek_odrebne_ustawy\n",
"0.00001 — Wojewodztwo_Dolnoslaskie\n",
"0.00000 — Zameldowania_ogolem\n",
"0.00000 — Wojewodztwo_Lubuskie\n",
"0.00000 — Ludnosc_mezczyzni\n",
"0.00000 — Wojewodztwo_Podlaskie\n",
"0.00000 — Ludnosc_kobiety_w_wieku_przedprodukcyjnym\n",
"0.00000 — Wojewodztwo_Wielkopolskie\n",
2024-05-10 19:17:54 +02:00
"0.00000 — Wojewodztwo_Malopolskie\n",
2024-05-13 15:20:33 +02:00
"0.00000 — Wojewodztwo_Zachodniopomorskie\n",
"0.00000 — Wojewodztwo_Kujawsko_Pomorskie\n",
"0.00000 — Wojewodztwo_Swietokrzyskie\n"
2024-05-06 23:50:22 +02:00
]
}
],
"source": [
"feature_importance = dict(zip(feature_names, model.feature_importances_))\n",
"for feature, importance in sorted(feature_importance.items(), key=lambda x: x[1], reverse=True):\n",
2024-05-10 19:17:54 +02:00
" print(f'{importance:.5f} \\u2014 {feature}')"
2024-05-06 23:50:22 +02:00
]
2024-05-10 19:17:54 +02:00
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
2024-05-06 22:55:21 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}