WUT_Computer_Science/data.ipynb

4000 lines
569 KiB
Plaintext
Raw Normal View History

2024-05-29 17:34:48 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
"execution_count": 762,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 763,
"metadata": {},
"outputs": [],
"source": [
"pd.options.display.float_format = '{:.2f}'.format"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
"execution_count": 764,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_24540\\3760256257.py:1: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_dofinansowanie = pd.read_csv(\n"
]
}
],
"source": [
"df_dofinansowanie = pd.read_csv(\n",
" 'umowy_pelna_lista_krajowe.csv',\n",
" encoding='ISO-8859-2',\n",
" converters={'TERYT pe?ny': str},\n",
" thousands=',')\n",
"\n",
"df_dofinansowanie = df_dofinansowanie.loc[df_dofinansowanie['TERYT pe?ny'] != ''].reset_index(drop=True)\n",
"\n",
"df_dofinansowanie['Dofinansowanie UE (PLN)'] = \\\n",
" df_dofinansowanie['Dofinansowanie UE (PLN)'].apply(pd.to_numeric)\n",
"\n",
"df_dofinansowanie['Data rozpocz?cia realizacji'] = pd.to_datetime(df_dofinansowanie['Data rozpocz?cia realizacji'])\n",
"df_dofinansowanie['Rok rozpocz?cia realizacji'] = df_dofinansowanie['Data rozpocz?cia realizacji'].dt.year\n",
"\n",
"df_dofinansowanie['Data podpisania umowy pierwotnej'] = pd.to_datetime(df_dofinansowanie['Data podpisania umowy pierwotnej'])\n",
"df_dofinansowanie['Rok podpisania umowy pierwotnej'] = df_dofinansowanie['Data podpisania umowy pierwotnej'].dt.year"
]
},
{
"cell_type": "code",
"execution_count": 765,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Program Operacyjny Inteligentny Rozwój'\n",
" 'Program Operacyjny Infrastruktura i ?rodowisko 2014-2020'\n",
" 'Program Operacyjny Polska Cyfrowa'\n",
" 'Program Operacyjny Pomoc Techniczna 2014-2020'\n",
" 'Program Operacyjny Polska Wschodnia'\n",
" 'Program Operacyjny Wiedza Edukacja Rozwój']\n"
]
}
],
"source": [
"print(df_dofinansowanie['Program operacyjny'].drop_duplicates().values)"
]
},
{
"cell_type": "code",
"execution_count": 766,
"metadata": {},
"outputs": [],
"source": [
"# Wybór programu operacyjnego...\n",
"# df_dofinansowanie = df_dofinansowanie.loc[df_dofinansowanie['Program operacyjny'] == 'Program Operacyjny Infrastruktura i ?rodowisko 2014-2020'].reset_index(drop=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 767,
"metadata": {},
"outputs": [],
"source": [
"df_dofinansowanie_agg = df_dofinansowanie \\\n",
" .groupby(['TERYT pe?ny', 'Program operacyjny', 'Rok rozpocz?cia realizacji'])['Dofinansowanie UE (PLN)'].sum().reset_index()\n",
"df_dofinansowanie_agg = df_dofinansowanie_agg \\\n",
" .rename(columns={'TERYT pe?ny': 'Kod', 'Rok rozpocz?cia realizacji': 'Rok', 'Program operacyjny': 'Program_operacyjny', 'Dofinansowanie UE (PLN)': 'Suma'})\n",
"df_dofinansowanie_agg = df_dofinansowanie_agg \\\n",
" .loc[df_dofinansowanie_agg['Kod'].str.len() == 7].reset_index(drop=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
"execution_count": 768,
"metadata": {},
"outputs": [],
"source": [
"df_podz = pd.read_csv(\n",
" 'PODZ_1410_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str})\n",
"df_podz = df_podz[['Kod', 'Rok', 'Wartosc']]\n",
"df_podz = df_podz.loc[df_podz['Kod'].str.endswith(('1', '2', '3'))]\n",
"df_podz = df_podz.dropna()\n",
"df_podz = df_podz.rename(columns={\n",
" 'Wartosc': 'Powierzchnia'})"
]
},
{
"cell_type": "code",
"execution_count": 769,
"metadata": {},
"outputs": [],
"source": [
"df_wyna = pd.read_csv(\n",
" 'WYNA_2497_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_wyna = df_wyna[['Kod', 'Wyszczególnienie', 'Rok', 'Wartosc']]\n",
"df_wyna = df_wyna.dropna()\n",
"df_wyna = df_wyna.pivot_table(index=['Kod', 'Rok'], columns='Wyszczególnienie', values='Wartosc').reset_index()\n",
"df_wyna = df_wyna.rename(columns={\n",
" 'ogółem': 'Wynagrodzenie_ogolem',\n",
" 'przeciętne miesięczne wynagrodzenia brutto w relacji do średniej krajowej (Polska=100)': 'Wynagrodzenie_w_relacji_do_sredniej'})"
]
},
{
"cell_type": "code",
"execution_count": 770,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_24540\\1671418303.py:1: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_fina_1 = pd.read_csv(\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Rodzaje dochodów</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Dochody_podatek_lesny</th>\n",
" <th>Dochody_podatek_PCC</th>\n",
" <th>Dochody_podatek_od_dzialalnosci_gospodarczej</th>\n",
" <th>Dochody_podatek_od_nieruchomosci</th>\n",
" <th>Dochody_podatek_od_spadkow</th>\n",
" <th>Dochody_podatek_od_srodkow_transportowych</th>\n",
" <th>Dochody_podatek_rolny</th>\n",
" <th>Dochody_podatek_odrebne_ustawy</th>\n",
" <th>Dochody_razem</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2004</td>\n",
" <td>NaN</td>\n",
" <td>549608.00</td>\n",
" <td>NaN</td>\n",
" <td>13532989.00</td>\n",
" <td>NaN</td>\n",
" <td>625159.00</td>\n",
" <td>23687.00</td>\n",
" <td>NaN</td>\n",
" <td>41378568.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2005</td>\n",
" <td>NaN</td>\n",
" <td>609855.00</td>\n",
" <td>NaN</td>\n",
" <td>13667398.00</td>\n",
" <td>NaN</td>\n",
" <td>700134.00</td>\n",
" <td>26634.00</td>\n",
" <td>15438121.00</td>\n",
" <td>43417443.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2006</td>\n",
" <td>NaN</td>\n",
" <td>844223.65</td>\n",
" <td>NaN</td>\n",
" <td>14633962.72</td>\n",
" <td>NaN</td>\n",
" <td>747182.64</td>\n",
" <td>11683.60</td>\n",
" <td>16647124.98</td>\n",
" <td>50319253.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2007</td>\n",
" <td>NaN</td>\n",
" <td>1344365.01</td>\n",
" <td>NaN</td>\n",
" <td>14944781.74</td>\n",
" <td>NaN</td>\n",
" <td>777345.52</td>\n",
" <td>19377.36</td>\n",
" <td>17436387.93</td>\n",
" <td>62025513.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2008</td>\n",
" <td>6799.55</td>\n",
" <td>1790135.40</td>\n",
" <td>NaN</td>\n",
" <td>16089534.56</td>\n",
" <td>NaN</td>\n",
" <td>836441.10</td>\n",
" <td>30823.60</td>\n",
" <td>19149551.45</td>\n",
" <td>80755930.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47078</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>154462.39</td>\n",
" <td>5361951.37</td>\n",
" <td>572868.36</td>\n",
" <td>108107448.79</td>\n",
" <td>437144.83</td>\n",
" <td>589658.88</td>\n",
" <td>51297.75</td>\n",
" <td>115274832.37</td>\n",
" <td>261780766.79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47079</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>150329.31</td>\n",
" <td>6088184.20</td>\n",
" <td>468411.51</td>\n",
" <td>38527846.59</td>\n",
" <td>228886.23</td>\n",
" <td>608637.40</td>\n",
" <td>64855.15</td>\n",
" <td>46137150.39</td>\n",
" <td>167638796.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47080</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>156556.52</td>\n",
" <td>5125090.74</td>\n",
" <td>329522.12</td>\n",
" <td>78767466.83</td>\n",
" <td>552009.16</td>\n",
" <td>558925.68</td>\n",
" <td>48689.09</td>\n",
" <td>85538260.14</td>\n",
" <td>263006955.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47081</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>163778.36</td>\n",
" <td>9082482.28</td>\n",
" <td>492045.28</td>\n",
" <td>78491368.35</td>\n",
" <td>947992.83</td>\n",
" <td>602586.14</td>\n",
" <td>59824.46</td>\n",
" <td>89840077.70</td>\n",
" <td>252345800.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47082</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>174823.49</td>\n",
" <td>7474079.65</td>\n",
" <td>1019054.56</td>\n",
" <td>84996948.99</td>\n",
" <td>593315.54</td>\n",
" <td>627169.86</td>\n",
" <td>50987.00</td>\n",
" <td>94936379.09</td>\n",
" <td>259310641.60</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>47083 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
"Rodzaje dochodów Kod Rok Dochody_podatek_lesny Dochody_podatek_PCC \n",
"0 0201011 2004 NaN 549608.00 \\\n",
"1 0201011 2005 NaN 609855.00 \n",
"2 0201011 2006 NaN 844223.65 \n",
"3 0201011 2007 NaN 1344365.01 \n",
"4 0201011 2008 6799.55 1790135.40 \n",
"... ... ... ... ... \n",
"47078 3263011 2018 154462.39 5361951.37 \n",
"47079 3263011 2019 150329.31 6088184.20 \n",
"47080 3263011 2020 156556.52 5125090.74 \n",
"47081 3263011 2021 163778.36 9082482.28 \n",
"47082 3263011 2022 174823.49 7474079.65 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_od_dzialalnosci_gospodarczej \n",
"0 NaN \\\n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 572868.36 \n",
"47079 468411.51 \n",
"47080 329522.12 \n",
"47081 492045.28 \n",
"47082 1019054.56 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_od_nieruchomosci \n",
"0 13532989.00 \\\n",
"1 13667398.00 \n",
"2 14633962.72 \n",
"3 14944781.74 \n",
"4 16089534.56 \n",
"... ... \n",
"47078 108107448.79 \n",
"47079 38527846.59 \n",
"47080 78767466.83 \n",
"47081 78491368.35 \n",
"47082 84996948.99 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_od_spadkow \n",
"0 NaN \\\n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 437144.83 \n",
"47079 228886.23 \n",
"47080 552009.16 \n",
"47081 947992.83 \n",
"47082 593315.54 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_od_srodkow_transportowych \n",
"0 625159.00 \\\n",
"1 700134.00 \n",
"2 747182.64 \n",
"3 777345.52 \n",
"4 836441.10 \n",
"... ... \n",
"47078 589658.88 \n",
"47079 608637.40 \n",
"47080 558925.68 \n",
"47081 602586.14 \n",
"47082 627169.86 \n",
"\n",
"Rodzaje dochodów Dochody_podatek_rolny Dochody_podatek_odrebne_ustawy \n",
"0 23687.00 NaN \\\n",
"1 26634.00 15438121.00 \n",
"2 11683.60 16647124.98 \n",
"3 19377.36 17436387.93 \n",
"4 30823.60 19149551.45 \n",
"... ... ... \n",
"47078 51297.75 115274832.37 \n",
"47079 64855.15 46137150.39 \n",
"47080 48689.09 85538260.14 \n",
"47081 59824.46 89840077.70 \n",
"47082 50987.00 94936379.09 \n",
"\n",
"Rodzaje dochodów Dochody_razem \n",
"0 41378568.00 \n",
"1 43417443.00 \n",
"2 50319253.08 \n",
"3 62025513.24 \n",
"4 80755930.93 \n",
"... ... \n",
"47078 261780766.79 \n",
"47079 167638796.15 \n",
"47080 263006955.07 \n",
"47081 252345800.93 \n",
"47082 259310641.60 \n",
"\n",
"[47083 rows x 11 columns]"
]
},
"execution_count": 770,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fina_1 = pd.read_csv(\n",
" 'FINA_2622_CREL_1.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_fina_1 = df_fina_1[['Kod', 'Rodzaje dochodów', 'Rok', 'Wartosc']]\n",
"df_fina_1 = df_fina_1.dropna()\n",
"df_fina_1 = df_fina_1.pivot_table(index=['Kod', 'Rok'], columns='Rodzaje dochodów', values='Wartosc').reset_index()\n",
"df_fina_1 = df_fina_1.rename(columns={\n",
" 'dochody podatkowe - podatek leśny': 'Dochody_podatek_lesny',\n",
" 'dochody podatkowe - podatek od czynności cywilnoprawnych': 'Dochody_podatek_PCC',\n",
" 'dochody podatkowe - podatek od działalności gospodarczej osób fizycznych, opłacany w formie karty podatkowej': 'Dochody_podatek_od_dzialalnosci_gospodarczej',\n",
" 'dochody podatkowe - podatek od nieruchomości': 'Dochody_podatek_od_nieruchomosci',\n",
" 'dochody podatkowe - podatek od spadków i darowizn': 'Dochody_podatek_od_spadkow',\n",
" 'dochody podatkowe - podatek od środków transportowych': 'Dochody_podatek_od_srodkow_transportowych',\n",
" 'dochody podatkowe - podatek rolny': 'Dochody_podatek_rolny',\n",
" 'dochody podatkowe - ustalone i pobierane na podstawie odrębnych ustaw': 'Dochody_podatek_odrebne_ustawy',\n",
" 'razem': 'Dochody_razem'})\n",
"\n",
"df_fina_1"
]
},
{
"cell_type": "code",
"execution_count": 771,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_24540\\2161929356.py:1: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_fina_2 = pd.read_csv(\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Rodzaje dochodów</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Dochody_z_majatku</th>\n",
" <th>Dochody_z_najmu_i_dzierzawy</th>\n",
" <th>Dochody_z_uslug</th>\n",
" <th>Dochody_dofinansowanie_inwestycyjne</th>\n",
" <th>Dochody_dofinansowanie_razem</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_od_osob_fizycznych</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_od_osob_prywatnych</th>\n",
" <th>Udzialy_w_podatkach_dochodowych_razem</th>\n",
" <th>Wplywy_z_innych_lokalnych_oplat</th>\n",
" <th>Wplywy_z_oplaty_eksploatacyjnej</th>\n",
" <th>Wplywy_z_oplaty_skarbowej</th>\n",
" <th>Wplywy_z_oplaty_targowej</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2004</td>\n",
" <td>5344205.00</td>\n",
" <td>NaN</td>\n",
" <td>184307.00</td>\n",
" <td>NaN</td>\n",
" <td>519209.00</td>\n",
" <td>13285456.00</td>\n",
" <td>1065169.00</td>\n",
" <td>14350625.00</td>\n",
" <td>44200.00</td>\n",
" <td>NaN</td>\n",
" <td>1209998.00</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2005</td>\n",
" <td>4560489.00</td>\n",
" <td>NaN</td>\n",
" <td>96462.00</td>\n",
" <td>NaN</td>\n",
" <td>9024183.00</td>\n",
" <td>15985331.00</td>\n",
" <td>1170863.00</td>\n",
" <td>17156194.00</td>\n",
" <td>42840.00</td>\n",
" <td>NaN</td>\n",
" <td>1282943.00</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2006</td>\n",
" <td>8528727.69</td>\n",
" <td>NaN</td>\n",
" <td>231470.96</td>\n",
" <td>8752288.98</td>\n",
" <td>8864860.57</td>\n",
" <td>18101668.00</td>\n",
" <td>1048115.83</td>\n",
" <td>19149783.83</td>\n",
" <td>37365.00</td>\n",
" <td>NaN</td>\n",
" <td>1203990.73</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2007</td>\n",
" <td>15042480.34</td>\n",
" <td>9219682.12</td>\n",
" <td>339654.15</td>\n",
" <td>18153240.30</td>\n",
" <td>18438743.21</td>\n",
" <td>21785308.00</td>\n",
" <td>1336702.02</td>\n",
" <td>23122010.02</td>\n",
" <td>78798.51</td>\n",
" <td>NaN</td>\n",
" <td>1228704.53</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2008</td>\n",
" <td>22797881.07</td>\n",
" <td>9546379.31</td>\n",
" <td>787256.69</td>\n",
" <td>5046691.69</td>\n",
" <td>5182137.79</td>\n",
" <td>23974587.00</td>\n",
" <td>1532633.44</td>\n",
" <td>25507220.44</td>\n",
" <td>83882.94</td>\n",
" <td>NaN</td>\n",
" <td>1364245.93</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47078</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>16419859.31</td>\n",
" <td>4261374.83</td>\n",
" <td>1996824.80</td>\n",
" <td>25285.92</td>\n",
" <td>237485.34</td>\n",
" <td>52799183.00</td>\n",
" <td>2690098.17</td>\n",
" <td>55489281.17</td>\n",
" <td>10458871.30</td>\n",
" <td>4684.54</td>\n",
" <td>434077.88</td>\n",
" <td>608625.90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47079</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>8844350.07</td>\n",
" <td>4324758.68</td>\n",
" <td>2187576.47</td>\n",
" <td>0.00</td>\n",
" <td>225831.84</td>\n",
" <td>55319040.00</td>\n",
" <td>2770684.17</td>\n",
" <td>58089724.17</td>\n",
" <td>11369287.11</td>\n",
" <td>3456.95</td>\n",
" <td>415686.53</td>\n",
" <td>610059.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47080</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>13485033.97</td>\n",
" <td>6159923.01</td>\n",
" <td>1917372.55</td>\n",
" <td>21002107.00</td>\n",
" <td>21192313.05</td>\n",
" <td>53739656.00</td>\n",
" <td>3144444.38</td>\n",
" <td>56884100.38</td>\n",
" <td>12281916.71</td>\n",
" <td>5157.50</td>\n",
" <td>355201.29</td>\n",
" <td>507341.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47081</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>16928500.75</td>\n",
" <td>7582499.62</td>\n",
" <td>4110105.72</td>\n",
" <td>888293.63</td>\n",
" <td>1072910.83</td>\n",
" <td>63936763.00</td>\n",
" <td>3975531.95</td>\n",
" <td>67912294.95</td>\n",
" <td>17127683.55</td>\n",
" <td>27746.70</td>\n",
" <td>416473.03</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47082</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>30415536.99</td>\n",
" <td>8651170.05</td>\n",
" <td>4117086.30</td>\n",
" <td>207597.50</td>\n",
" <td>800347.63</td>\n",
" <td>64657287.40</td>\n",
" <td>4082611.64</td>\n",
" <td>68739899.04</td>\n",
" <td>19150342.25</td>\n",
" <td>5035.87</td>\n",
" <td>421424.91</td>\n",
" <td>1233266.30</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>47083 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
"Rodzaje dochodów Kod Rok Dochody_z_majatku \n",
"0 0201011 2004 5344205.00 \\\n",
"1 0201011 2005 4560489.00 \n",
"2 0201011 2006 8528727.69 \n",
"3 0201011 2007 15042480.34 \n",
"4 0201011 2008 22797881.07 \n",
"... ... ... ... \n",
"47078 3263011 2018 16419859.31 \n",
"47079 3263011 2019 8844350.07 \n",
"47080 3263011 2020 13485033.97 \n",
"47081 3263011 2021 16928500.75 \n",
"47082 3263011 2022 30415536.99 \n",
"\n",
"Rodzaje dochodów Dochody_z_najmu_i_dzierzawy Dochody_z_uslug \n",
"0 NaN 184307.00 \\\n",
"1 NaN 96462.00 \n",
"2 NaN 231470.96 \n",
"3 9219682.12 339654.15 \n",
"4 9546379.31 787256.69 \n",
"... ... ... \n",
"47078 4261374.83 1996824.80 \n",
"47079 4324758.68 2187576.47 \n",
"47080 6159923.01 1917372.55 \n",
"47081 7582499.62 4110105.72 \n",
"47082 8651170.05 4117086.30 \n",
"\n",
"Rodzaje dochodów Dochody_dofinansowanie_inwestycyjne \n",
"0 NaN \\\n",
"1 NaN \n",
"2 8752288.98 \n",
"3 18153240.30 \n",
"4 5046691.69 \n",
"... ... \n",
"47078 25285.92 \n",
"47079 0.00 \n",
"47080 21002107.00 \n",
"47081 888293.63 \n",
"47082 207597.50 \n",
"\n",
"Rodzaje dochodów Dochody_dofinansowanie_razem \n",
"0 519209.00 \\\n",
"1 9024183.00 \n",
"2 8864860.57 \n",
"3 18438743.21 \n",
"4 5182137.79 \n",
"... ... \n",
"47078 237485.34 \n",
"47079 225831.84 \n",
"47080 21192313.05 \n",
"47081 1072910.83 \n",
"47082 800347.63 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_od_osob_fizycznych \n",
"0 13285456.00 \\\n",
"1 15985331.00 \n",
"2 18101668.00 \n",
"3 21785308.00 \n",
"4 23974587.00 \n",
"... ... \n",
"47078 52799183.00 \n",
"47079 55319040.00 \n",
"47080 53739656.00 \n",
"47081 63936763.00 \n",
"47082 64657287.40 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_od_osob_prywatnych \n",
"0 1065169.00 \\\n",
"1 1170863.00 \n",
"2 1048115.83 \n",
"3 1336702.02 \n",
"4 1532633.44 \n",
"... ... \n",
"47078 2690098.17 \n",
"47079 2770684.17 \n",
"47080 3144444.38 \n",
"47081 3975531.95 \n",
"47082 4082611.64 \n",
"\n",
"Rodzaje dochodów Udzialy_w_podatkach_dochodowych_razem \n",
"0 14350625.00 \\\n",
"1 17156194.00 \n",
"2 19149783.83 \n",
"3 23122010.02 \n",
"4 25507220.44 \n",
"... ... \n",
"47078 55489281.17 \n",
"47079 58089724.17 \n",
"47080 56884100.38 \n",
"47081 67912294.95 \n",
"47082 68739899.04 \n",
"\n",
"Rodzaje dochodów Wplywy_z_innych_lokalnych_oplat \n",
"0 44200.00 \\\n",
"1 42840.00 \n",
"2 37365.00 \n",
"3 78798.51 \n",
"4 83882.94 \n",
"... ... \n",
"47078 10458871.30 \n",
"47079 11369287.11 \n",
"47080 12281916.71 \n",
"47081 17127683.55 \n",
"47082 19150342.25 \n",
"\n",
"Rodzaje dochodów Wplywy_z_oplaty_eksploatacyjnej Wplywy_z_oplaty_skarbowej \n",
"0 NaN 1209998.00 \\\n",
"1 NaN 1282943.00 \n",
"2 NaN 1203990.73 \n",
"3 NaN 1228704.53 \n",
"4 NaN 1364245.93 \n",
"... ... ... \n",
"47078 4684.54 434077.88 \n",
"47079 3456.95 415686.53 \n",
"47080 5157.50 355201.29 \n",
"47081 27746.70 416473.03 \n",
"47082 5035.87 421424.91 \n",
"\n",
"Rodzaje dochodów Wplywy_z_oplaty_targowej \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"47078 608625.90 \n",
"47079 610059.50 \n",
"47080 507341.00 \n",
"47081 0.00 \n",
"47082 1233266.30 \n",
"\n",
"[47083 rows x 14 columns]"
]
},
"execution_count": 771,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fina_2 = pd.read_csv(\n",
" 'FINA_2622_CREL_2.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_fina_2 = df_fina_2[['Kod', 'Rodzaje dochodów', 'Rok', 'Wartosc']]\n",
"df_fina_2 = df_fina_2.dropna()\n",
"df_fina_2 = df_fina_2.pivot_table(index=['Kod', 'Rok'], columns='Rodzaje dochodów', values='Wartosc').reset_index()\n",
"df_fina_2 = df_fina_2.rename(columns={\n",
" 'dochody z majątku': 'Dochody_z_majatku',\n",
" 'dochody z majątku - dochody z najmu i dzierżawy składników majątkowych JST oraz innych umów o podobnym charakterze': 'Dochody_z_najmu_i_dzierzawy',\n",
" 'pozostałe dochody - wpływy z usług': 'Dochody_z_uslug',\n",
" 'pozostałe dochody - środki na dofinansowanie własnych zadań pozyskane z innych źródeł - inwestycyjne': 'Dochody_dofinansowanie_inwestycyjne',\n",
" 'pozostałe dochody - środki na dofinansowanie własnych zadań pozyskane z innych źródeł - razem': 'Dochody_dofinansowanie_razem',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa podatek dochodowy od osób fizycznych': 'Udzialy_w_podatkach_dochodowych_od_osob_fizycznych',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa podatek dochodowy od osób prawnych': 'Udzialy_w_podatkach_dochodowych_od_osob_prywatnych',\n",
" 'udziały w podatkach stanowiących dochody budżetu państwa razem': 'Udzialy_w_podatkach_dochodowych_razem',\n",
" 'wpływy z innych lokalnych opłat pobieranych przez jednostki samorządu terytorialnego na podstawie odrębnych ustaw': 'Wplywy_z_innych_lokalnych_oplat',\n",
" 'wpływy z opłaty eksploatacyjnej': 'Wplywy_z_oplaty_eksploatacyjnej',\n",
" 'wpływy z opłaty skarbowej': 'Wplywy_z_oplaty_skarbowej',\n",
" 'wpływy z opłaty targowej': 'Wplywy_z_oplaty_targowej'})\n",
"\n",
"df_fina_2"
]
},
{
"cell_type": "code",
"execution_count": 772,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_ogolem</th>\n",
" <th>Ludnosc_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>40309.00</td>\n",
" <td>7683.00</td>\n",
" <td>26085.00</td>\n",
" <td>15183.00</td>\n",
" <td>10902.00</td>\n",
" <td>6541.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>40119.00</td>\n",
" <td>8020.00</td>\n",
" <td>25647.00</td>\n",
" <td>15047.00</td>\n",
" <td>10600.00</td>\n",
" <td>6452.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>39851.00</td>\n",
" <td>8392.00</td>\n",
" <td>25160.00</td>\n",
" <td>14932.00</td>\n",
" <td>10228.00</td>\n",
" <td>6299.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>39603.00</td>\n",
" <td>8678.00</td>\n",
" <td>24720.00</td>\n",
" <td>14784.00</td>\n",
" <td>9936.00</td>\n",
" <td>6205.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>39464.00</td>\n",
" <td>8971.00</td>\n",
" <td>24307.00</td>\n",
" <td>14645.00</td>\n",
" <td>9662.00</td>\n",
" <td>6186.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32210</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>40910.00</td>\n",
" <td>10472.00</td>\n",
" <td>24549.00</td>\n",
" <td>14683.00</td>\n",
" <td>9866.00</td>\n",
" <td>5889.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32211</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>40888.00</td>\n",
" <td>10788.00</td>\n",
" <td>24209.00</td>\n",
" <td>14429.00</td>\n",
" <td>9780.00</td>\n",
" <td>5891.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32212</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>40326.00</td>\n",
" <td>10962.00</td>\n",
" <td>23544.00</td>\n",
" <td>13798.00</td>\n",
" <td>9746.00</td>\n",
" <td>5820.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32213</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>39834.00</td>\n",
" <td>11050.00</td>\n",
" <td>22976.00</td>\n",
" <td>13277.00</td>\n",
" <td>9699.00</td>\n",
" <td>5808.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32214</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>39368.00</td>\n",
" <td>11157.00</td>\n",
" <td>22486.00</td>\n",
" <td>12802.00</td>\n",
" <td>9684.00</td>\n",
" <td>5725.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>32215 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_ogolem Ludnosc_w_wieku_poprodukcyjnym \n",
"0 0201011 2010 40309.00 7683.00 \\\n",
"1 0201011 2011 40119.00 8020.00 \n",
"2 0201011 2012 39851.00 8392.00 \n",
"3 0201011 2013 39603.00 8678.00 \n",
"4 0201011 2014 39464.00 8971.00 \n",
"... ... ... ... ... \n",
"32210 3263011 2018 40910.00 10472.00 \n",
"32211 3263011 2019 40888.00 10788.00 \n",
"32212 3263011 2020 40326.00 10962.00 \n",
"32213 3263011 2021 39834.00 11050.00 \n",
"32214 3263011 2022 39368.00 11157.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_produkcyjnym Ludnosc_w_wieku_produkcyjnym_mobilnym \n",
"0 26085.00 15183.00 \\\n",
"1 25647.00 15047.00 \n",
"2 25160.00 14932.00 \n",
"3 24720.00 14784.00 \n",
"4 24307.00 14645.00 \n",
"... ... ... \n",
"32210 24549.00 14683.00 \n",
"32211 24209.00 14429.00 \n",
"32212 23544.00 13798.00 \n",
"32213 22976.00 13277.00 \n",
"32214 22486.00 12802.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_produkcyjnym_niemobilnym \n",
"0 10902.00 \\\n",
"1 10600.00 \n",
"2 10228.00 \n",
"3 9936.00 \n",
"4 9662.00 \n",
"... ... \n",
"32210 9866.00 \n",
"32211 9780.00 \n",
"32212 9746.00 \n",
"32213 9699.00 \n",
"32214 9684.00 \n",
"\n",
"Wiek Ludnosc_w_wieku_przedprodukcyjnym \n",
"0 6541.00 \n",
"1 6452.00 \n",
"2 6299.00 \n",
"3 6205.00 \n",
"4 6186.00 \n",
"... ... \n",
"32210 5889.00 \n",
"32211 5891.00 \n",
"32212 5820.00 \n",
"32213 5808.00 \n",
"32214 5725.00 \n",
"\n",
"[32215 rows x 8 columns]"
]
},
"execution_count": 772,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_1 = pd.read_csv( # ogolem\n",
" 'LUDN_1342_CREL_1.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_1 = df_ludn_1[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_1 = df_ludn_1.dropna()\n",
"df_ludn_1 = df_ludn_1.loc[df_ludn_1['Kod'].str.endswith(('1', '2', '3'))]\n",
"df_ludn_1 = df_ludn_1.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_1 = df_ludn_1.rename(columns={\n",
" 'ogółem': 'Ludnosc_ogolem',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_1"
]
},
{
"cell_type": "code",
"execution_count": 773,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_mezczyzni</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>19085.00</td>\n",
" <td>2153.00</td>\n",
" <td>13535.00</td>\n",
" <td>7720.00</td>\n",
" <td>5815.00</td>\n",
" <td>3397.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>18985.00</td>\n",
" <td>2222.00</td>\n",
" <td>13398.00</td>\n",
" <td>7647.00</td>\n",
" <td>5751.00</td>\n",
" <td>3365.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>18859.00</td>\n",
" <td>2370.00</td>\n",
" <td>13238.00</td>\n",
" <td>7611.00</td>\n",
" <td>5627.00</td>\n",
" <td>3251.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>18737.00</td>\n",
" <td>2477.00</td>\n",
" <td>13028.00</td>\n",
" <td>7501.00</td>\n",
" <td>5527.00</td>\n",
" <td>3232.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>18640.00</td>\n",
" <td>2620.00</td>\n",
" <td>12832.00</td>\n",
" <td>7442.00</td>\n",
" <td>5390.00</td>\n",
" <td>3188.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32210</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>19690.00</td>\n",
" <td>3501.00</td>\n",
" <td>13202.00</td>\n",
" <td>7547.00</td>\n",
" <td>5655.00</td>\n",
" <td>2987.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32211</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>19683.00</td>\n",
" <td>3644.00</td>\n",
" <td>13044.00</td>\n",
" <td>7417.00</td>\n",
" <td>5627.00</td>\n",
" <td>2995.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32212</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>19356.00</td>\n",
" <td>3749.00</td>\n",
" <td>12617.00</td>\n",
" <td>6986.00</td>\n",
" <td>5631.00</td>\n",
" <td>2990.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32213</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>19096.00</td>\n",
" <td>3852.00</td>\n",
" <td>12267.00</td>\n",
" <td>6747.00</td>\n",
" <td>5520.00</td>\n",
" <td>2977.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32214</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>18869.00</td>\n",
" <td>3901.00</td>\n",
" <td>12009.00</td>\n",
" <td>6485.00</td>\n",
" <td>5524.00</td>\n",
" <td>2959.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>32215 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_mezczyzni \n",
"0 0201011 2010 19085.00 \\\n",
"1 0201011 2011 18985.00 \n",
"2 0201011 2012 18859.00 \n",
"3 0201011 2013 18737.00 \n",
"4 0201011 2014 18640.00 \n",
"... ... ... ... \n",
"32210 3263011 2018 19690.00 \n",
"32211 3263011 2019 19683.00 \n",
"32212 3263011 2020 19356.00 \n",
"32213 3263011 2021 19096.00 \n",
"32214 3263011 2022 18869.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_poprodukcyjnym \n",
"0 2153.00 \\\n",
"1 2222.00 \n",
"2 2370.00 \n",
"3 2477.00 \n",
"4 2620.00 \n",
"... ... \n",
"32210 3501.00 \n",
"32211 3644.00 \n",
"32212 3749.00 \n",
"32213 3852.00 \n",
"32214 3901.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym \n",
"0 13535.00 \\\n",
"1 13398.00 \n",
"2 13238.00 \n",
"3 13028.00 \n",
"4 12832.00 \n",
"... ... \n",
"32210 13202.00 \n",
"32211 13044.00 \n",
"32212 12617.00 \n",
"32213 12267.00 \n",
"32214 12009.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym \n",
"0 7720.00 \\\n",
"1 7647.00 \n",
"2 7611.00 \n",
"3 7501.00 \n",
"4 7442.00 \n",
"... ... \n",
"32210 7547.00 \n",
"32211 7417.00 \n",
"32212 6986.00 \n",
"32213 6747.00 \n",
"32214 6485.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym \n",
"0 5815.00 \\\n",
"1 5751.00 \n",
"2 5627.00 \n",
"3 5527.00 \n",
"4 5390.00 \n",
"... ... \n",
"32210 5655.00 \n",
"32211 5627.00 \n",
"32212 5631.00 \n",
"32213 5520.00 \n",
"32214 5524.00 \n",
"\n",
"Wiek Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym \n",
"0 3397.00 \n",
"1 3365.00 \n",
"2 3251.00 \n",
"3 3232.00 \n",
"4 3188.00 \n",
"... ... \n",
"32210 2987.00 \n",
"32211 2995.00 \n",
"32212 2990.00 \n",
"32213 2977.00 \n",
"32214 2959.00 \n",
"\n",
"[32215 rows x 8 columns]"
]
},
"execution_count": 773,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_2 = pd.read_csv( # mezczyzni\n",
" 'LUDN_1342_CREL_2.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_2 = df_ludn_2[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_2 = df_ludn_2.dropna()\n",
"df_ludn_2 = df_ludn_2.loc[df_ludn_2['Kod'].str.endswith(('1', '2', '3'))]\n",
"df_ludn_2 = df_ludn_2.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_2 = df_ludn_2.rename(columns={\n",
" 'ogółem': 'Ludnosc_mezczyzni',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_mezczyzni_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_2"
]
},
{
"cell_type": "code",
"execution_count": 774,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Wiek</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Ludnosc_kobiety</th>\n",
" <th>Ludnosc_kobiety_w_wieku_poprodukcyjnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym</th>\n",
" <th>Ludnosc_kobiety_w_wieku_przedprodukcyjnym</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>21224.00</td>\n",
" <td>5530.00</td>\n",
" <td>12550.00</td>\n",
" <td>7463.00</td>\n",
" <td>5087.00</td>\n",
" <td>3144.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>21134.00</td>\n",
" <td>5798.00</td>\n",
" <td>12249.00</td>\n",
" <td>7400.00</td>\n",
" <td>4849.00</td>\n",
" <td>3087.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>20992.00</td>\n",
" <td>6022.00</td>\n",
" <td>11922.00</td>\n",
" <td>7321.00</td>\n",
" <td>4601.00</td>\n",
" <td>3048.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>20866.00</td>\n",
" <td>6201.00</td>\n",
" <td>11692.00</td>\n",
" <td>7283.00</td>\n",
" <td>4409.00</td>\n",
" <td>2973.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>20824.00</td>\n",
" <td>6351.00</td>\n",
" <td>11475.00</td>\n",
" <td>7203.00</td>\n",
" <td>4272.00</td>\n",
" <td>2998.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32210</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>21220.00</td>\n",
" <td>6971.00</td>\n",
" <td>11347.00</td>\n",
" <td>7136.00</td>\n",
" <td>4211.00</td>\n",
" <td>2902.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32211</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>21205.00</td>\n",
" <td>7144.00</td>\n",
" <td>11165.00</td>\n",
" <td>7012.00</td>\n",
" <td>4153.00</td>\n",
" <td>2896.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32212</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>20970.00</td>\n",
" <td>7213.00</td>\n",
" <td>10927.00</td>\n",
" <td>6812.00</td>\n",
" <td>4115.00</td>\n",
" <td>2830.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32213</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>20738.00</td>\n",
" <td>7198.00</td>\n",
" <td>10709.00</td>\n",
" <td>6530.00</td>\n",
" <td>4179.00</td>\n",
" <td>2831.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32214</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>20499.00</td>\n",
" <td>7256.00</td>\n",
" <td>10477.00</td>\n",
" <td>6317.00</td>\n",
" <td>4160.00</td>\n",
" <td>2766.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>32215 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Wiek Kod Rok Ludnosc_kobiety Ludnosc_kobiety_w_wieku_poprodukcyjnym \n",
"0 0201011 2010 21224.00 5530.00 \\\n",
"1 0201011 2011 21134.00 5798.00 \n",
"2 0201011 2012 20992.00 6022.00 \n",
"3 0201011 2013 20866.00 6201.00 \n",
"4 0201011 2014 20824.00 6351.00 \n",
"... ... ... ... ... \n",
"32210 3263011 2018 21220.00 6971.00 \n",
"32211 3263011 2019 21205.00 7144.00 \n",
"32212 3263011 2020 20970.00 7213.00 \n",
"32213 3263011 2021 20738.00 7198.00 \n",
"32214 3263011 2022 20499.00 7256.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym \n",
"0 12550.00 \\\n",
"1 12249.00 \n",
"2 11922.00 \n",
"3 11692.00 \n",
"4 11475.00 \n",
"... ... \n",
"32210 11347.00 \n",
"32211 11165.00 \n",
"32212 10927.00 \n",
"32213 10709.00 \n",
"32214 10477.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym \n",
"0 7463.00 \\\n",
"1 7400.00 \n",
"2 7321.00 \n",
"3 7283.00 \n",
"4 7203.00 \n",
"... ... \n",
"32210 7136.00 \n",
"32211 7012.00 \n",
"32212 6812.00 \n",
"32213 6530.00 \n",
"32214 6317.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym \n",
"0 5087.00 \\\n",
"1 4849.00 \n",
"2 4601.00 \n",
"3 4409.00 \n",
"4 4272.00 \n",
"... ... \n",
"32210 4211.00 \n",
"32211 4153.00 \n",
"32212 4115.00 \n",
"32213 4179.00 \n",
"32214 4160.00 \n",
"\n",
"Wiek Ludnosc_kobiety_w_wieku_przedprodukcyjnym \n",
"0 3144.00 \n",
"1 3087.00 \n",
"2 3048.00 \n",
"3 2973.00 \n",
"4 2998.00 \n",
"... ... \n",
"32210 2902.00 \n",
"32211 2896.00 \n",
"32212 2830.00 \n",
"32213 2831.00 \n",
"32214 2766.00 \n",
"\n",
"[32215 rows x 8 columns]"
]
},
"execution_count": 774,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_3 = pd.read_csv( # kobiety\n",
" 'LUDN_1342_CREL_3.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_3 = df_ludn_3[['Kod', 'Wiek', 'Rok', 'Wartosc']]\n",
"df_ludn_3 = df_ludn_3.dropna()\n",
"df_ludn_3 = df_ludn_3.loc[df_ludn_3['Kod'].str.endswith(('1', '2', '3'))]\n",
"df_ludn_3 = df_ludn_3.pivot_table(index=['Kod', 'Rok'], columns='Wiek', values='Wartosc').reset_index()\n",
"df_ludn_3 = df_ludn_3.rename(columns={\n",
" 'ogółem': 'Ludnosc_kobiety',\n",
" 'w wieku poprodukcyjnym': 'Ludnosc_kobiety_w_wieku_poprodukcyjnym',\n",
" 'w wieku produkcyjnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym',\n",
" 'w wieku produkcyjnym mobilnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym',\n",
" 'w wieku produkcyjnym niemobilnym': 'Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym',\n",
" 'w wieku przedprodukcyjnym': 'Ludnosc_kobiety_w_wieku_przedprodukcyjnym'})\n",
"\n",
"df_ludn_3"
]
},
{
"cell_type": "code",
"execution_count": 775,
"metadata": {},
"outputs": [],
"source": [
"df_ludn_4 = pd.read_csv(\n",
" 'LUDN_2425_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_4 = df_ludn_4[['Kod', 'Wskaźniki', 'Rok', 'Wartosc']]\n",
"df_ludn_4 = df_ludn_4.dropna()\n",
"df_ludn_4 = df_ludn_4.loc[df_ludn_4['Kod'].str.endswith(('1', '2', '3'))]\n",
"df_ludn_4 = df_ludn_4.pivot_table(index=['Kod', 'Rok'], columns='Wskaźniki', values='Wartosc').reset_index()\n",
"df_ludn_4 = df_ludn_4.rename(columns={\n",
" 'gęstość zaludnienia powierzchni zabudowanej i zurbanizowanej (osoby/km2)': 'Gestosc_zaludnienia',\n",
" 'ludność na 1 km2': 'Ludnosc_na_1_km2',\n",
" 'ludność w tysiącach': 'Ludnosc',\n",
" 'ludność w tysiącach kobiety': 'Ludnosc_kobiety',\n",
" 'ludność w tysiącach mężczyźni': 'Ludnosc_mezczyzni',\n",
" 'wskaźnik urbanizacji': 'Wskaznik_urbanizacji',\n",
" 'zmiana liczby ludności na 1000 mieszkańców': 'Zmiana_liczby_ludnosci'})\n",
"\n",
"df_ludn_4 = df_ludn_4[[\n",
" 'Kod',\n",
" 'Rok',\n",
" # 'Gestosc_zaludnienia',\n",
" 'Ludnosc_na_1_km2',\n",
" 'Ludnosc',\n",
" 'Ludnosc_kobiety',\n",
" 'Ludnosc_mezczyzni',\n",
" 'Wskaznik_urbanizacji',\n",
" 'Zmiana_liczby_ludnosci']]"
]
},
{
"cell_type": "code",
"execution_count": 776,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Kierunki migracji</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Saldo_migracji_na_1000_ludnosci</th>\n",
" <th>Saldo_migracji</th>\n",
" <th>Wymeldowania_do_miast_kobiety</th>\n",
" <th>Wymeldowania_do_miast_mezczyzni</th>\n",
" <th>Wymeldowania_do_miast_ogolem</th>\n",
" <th>Wymeldowania_na_wies_kobiety</th>\n",
" <th>Wymeldowania_na_wies_mezczyzni</th>\n",
" <th>Wymeldowania_na_wies_ogolem</th>\n",
" <th>...</th>\n",
" <th>Wymeldowania_za_granice_ogolem</th>\n",
" <th>Zameldowania_kobiety</th>\n",
" <th>Zameldowania_mezczyzni</th>\n",
" <th>Zameldowania_ogolem</th>\n",
" <th>Zameldowania_z_miast_kobiety</th>\n",
" <th>Zameldowania_z_miast_mezczyzni</th>\n",
" <th>Zameldowania_z_miast_ogolem</th>\n",
" <th>Zameldowania_ze_wsi_kobiety</th>\n",
" <th>Zameldowania_ze_wsi_mezczyzni</th>\n",
" <th>Zameldowania_ze_wsi_ogolem</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>-3.70</td>\n",
" <td>-151.00</td>\n",
" <td>108.00</td>\n",
" <td>96.00</td>\n",
" <td>204.00</td>\n",
" <td>170.00</td>\n",
" <td>177.00</td>\n",
" <td>347.00</td>\n",
" <td>...</td>\n",
" <td>0.00</td>\n",
" <td>223.00</td>\n",
" <td>177.00</td>\n",
" <td>400.00</td>\n",
" <td>70.00</td>\n",
" <td>52.00</td>\n",
" <td>122.00</td>\n",
" <td>147.00</td>\n",
" <td>118.00</td>\n",
" <td>265.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>-4.60</td>\n",
" <td>-186.00</td>\n",
" <td>111.00</td>\n",
" <td>99.00</td>\n",
" <td>210.00</td>\n",
" <td>170.00</td>\n",
" <td>157.00</td>\n",
" <td>327.00</td>\n",
" <td>...</td>\n",
" <td>1.00</td>\n",
" <td>196.00</td>\n",
" <td>156.00</td>\n",
" <td>352.00</td>\n",
" <td>67.00</td>\n",
" <td>59.00</td>\n",
" <td>126.00</td>\n",
" <td>125.00</td>\n",
" <td>94.00</td>\n",
" <td>219.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>-3.70</td>\n",
" <td>-149.00</td>\n",
" <td>100.00</td>\n",
" <td>92.00</td>\n",
" <td>192.00</td>\n",
" <td>147.00</td>\n",
" <td>153.00</td>\n",
" <td>300.00</td>\n",
" <td>...</td>\n",
" <td>9.00</td>\n",
" <td>197.00</td>\n",
" <td>155.00</td>\n",
" <td>352.00</td>\n",
" <td>78.00</td>\n",
" <td>61.00</td>\n",
" <td>139.00</td>\n",
" <td>116.00</td>\n",
" <td>92.00</td>\n",
" <td>208.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>-4.80</td>\n",
" <td>-191.00</td>\n",
" <td>115.00</td>\n",
" <td>88.00</td>\n",
" <td>203.00</td>\n",
" <td>182.00</td>\n",
" <td>158.00</td>\n",
" <td>340.00</td>\n",
" <td>...</td>\n",
" <td>24.00</td>\n",
" <td>211.00</td>\n",
" <td>165.00</td>\n",
" <td>376.00</td>\n",
" <td>83.00</td>\n",
" <td>58.00</td>\n",
" <td>141.00</td>\n",
" <td>128.00</td>\n",
" <td>101.00</td>\n",
" <td>229.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>-4.20</td>\n",
" <td>-167.00</td>\n",
" <td>100.00</td>\n",
" <td>86.00</td>\n",
" <td>186.00</td>\n",
" <td>168.00</td>\n",
" <td>161.00</td>\n",
" <td>329.00</td>\n",
" <td>...</td>\n",
" <td>41.00</td>\n",
" <td>196.00</td>\n",
" <td>193.00</td>\n",
" <td>389.00</td>\n",
" <td>71.00</td>\n",
" <td>71.00</td>\n",
" <td>142.00</td>\n",
" <td>125.00</td>\n",
" <td>121.00</td>\n",
" <td>246.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32210</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>1.70</td>\n",
" <td>71.00</td>\n",
" <td>125.00</td>\n",
" <td>152.00</td>\n",
" <td>277.00</td>\n",
" <td>40.00</td>\n",
" <td>66.00</td>\n",
" <td>106.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>245.00</td>\n",
" <td>240.00</td>\n",
" <td>485.00</td>\n",
" <td>156.00</td>\n",
" <td>138.00</td>\n",
" <td>294.00</td>\n",
" <td>73.00</td>\n",
" <td>79.00</td>\n",
" <td>152.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32211</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>3.40</td>\n",
" <td>141.00</td>\n",
" <td>151.00</td>\n",
" <td>116.00</td>\n",
" <td>267.00</td>\n",
" <td>48.00</td>\n",
" <td>53.00</td>\n",
" <td>101.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>273.00</td>\n",
" <td>259.00</td>\n",
" <td>532.00</td>\n",
" <td>179.00</td>\n",
" <td>149.00</td>\n",
" <td>328.00</td>\n",
" <td>71.00</td>\n",
" <td>90.00</td>\n",
" <td>161.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32212</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>3.20</td>\n",
" <td>129.00</td>\n",
" <td>98.00</td>\n",
" <td>99.00</td>\n",
" <td>197.00</td>\n",
" <td>40.00</td>\n",
" <td>44.00</td>\n",
" <td>84.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>226.00</td>\n",
" <td>203.00</td>\n",
" <td>429.00</td>\n",
" <td>159.00</td>\n",
" <td>131.00</td>\n",
" <td>290.00</td>\n",
" <td>52.00</td>\n",
" <td>53.00</td>\n",
" <td>105.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32213</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>-1.40</td>\n",
" <td>-55.00</td>\n",
" <td>122.00</td>\n",
" <td>126.00</td>\n",
" <td>248.00</td>\n",
" <td>63.00</td>\n",
" <td>50.00</td>\n",
" <td>113.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>171.00</td>\n",
" <td>168.00</td>\n",
" <td>339.00</td>\n",
" <td>109.00</td>\n",
" <td>95.00</td>\n",
" <td>204.00</td>\n",
" <td>49.00</td>\n",
" <td>46.00</td>\n",
" <td>95.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32214</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>-3.50</td>\n",
" <td>-138.00</td>\n",
" <td>116.00</td>\n",
" <td>105.00</td>\n",
" <td>221.00</td>\n",
" <td>73.00</td>\n",
" <td>69.00</td>\n",
" <td>142.00</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>141.00</td>\n",
" <td>138.00</td>\n",
" <td>279.00</td>\n",
" <td>85.00</td>\n",
" <td>71.00</td>\n",
" <td>156.00</td>\n",
" <td>38.00</td>\n",
" <td>39.00</td>\n",
" <td>77.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>32215 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
"Kierunki migracji Kod Rok Saldo_migracji_na_1000_ludnosci \n",
"0 0201011 2010 -3.70 \\\n",
"1 0201011 2011 -4.60 \n",
"2 0201011 2012 -3.70 \n",
"3 0201011 2013 -4.80 \n",
"4 0201011 2014 -4.20 \n",
"... ... ... ... \n",
"32210 3263011 2018 1.70 \n",
"32211 3263011 2019 3.40 \n",
"32212 3263011 2020 3.20 \n",
"32213 3263011 2021 -1.40 \n",
"32214 3263011 2022 -3.50 \n",
"\n",
"Kierunki migracji Saldo_migracji Wymeldowania_do_miast_kobiety \n",
"0 -151.00 108.00 \\\n",
"1 -186.00 111.00 \n",
"2 -149.00 100.00 \n",
"3 -191.00 115.00 \n",
"4 -167.00 100.00 \n",
"... ... ... \n",
"32210 71.00 125.00 \n",
"32211 141.00 151.00 \n",
"32212 129.00 98.00 \n",
"32213 -55.00 122.00 \n",
"32214 -138.00 116.00 \n",
"\n",
"Kierunki migracji Wymeldowania_do_miast_mezczyzni \n",
"0 96.00 \\\n",
"1 99.00 \n",
"2 92.00 \n",
"3 88.00 \n",
"4 86.00 \n",
"... ... \n",
"32210 152.00 \n",
"32211 116.00 \n",
"32212 99.00 \n",
"32213 126.00 \n",
"32214 105.00 \n",
"\n",
"Kierunki migracji Wymeldowania_do_miast_ogolem Wymeldowania_na_wies_kobiety \n",
"0 204.00 170.00 \\\n",
"1 210.00 170.00 \n",
"2 192.00 147.00 \n",
"3 203.00 182.00 \n",
"4 186.00 168.00 \n",
"... ... ... \n",
"32210 277.00 40.00 \n",
"32211 267.00 48.00 \n",
"32212 197.00 40.00 \n",
"32213 248.00 63.00 \n",
"32214 221.00 73.00 \n",
"\n",
"Kierunki migracji Wymeldowania_na_wies_mezczyzni \n",
"0 177.00 \\\n",
"1 157.00 \n",
"2 153.00 \n",
"3 158.00 \n",
"4 161.00 \n",
"... ... \n",
"32210 66.00 \n",
"32211 53.00 \n",
"32212 44.00 \n",
"32213 50.00 \n",
"32214 69.00 \n",
"\n",
"Kierunki migracji Wymeldowania_na_wies_ogolem ... \n",
"0 347.00 ... \\\n",
"1 327.00 ... \n",
"2 300.00 ... \n",
"3 340.00 ... \n",
"4 329.00 ... \n",
"... ... ... \n",
"32210 106.00 ... \n",
"32211 101.00 ... \n",
"32212 84.00 ... \n",
"32213 113.00 ... \n",
"32214 142.00 ... \n",
"\n",
"Kierunki migracji Wymeldowania_za_granice_ogolem Zameldowania_kobiety \n",
"0 0.00 223.00 \\\n",
"1 1.00 196.00 \n",
"2 9.00 197.00 \n",
"3 24.00 211.00 \n",
"4 41.00 196.00 \n",
"... ... ... \n",
"32210 NaN 245.00 \n",
"32211 NaN 273.00 \n",
"32212 NaN 226.00 \n",
"32213 NaN 171.00 \n",
"32214 NaN 141.00 \n",
"\n",
"Kierunki migracji Zameldowania_mezczyzni Zameldowania_ogolem \n",
"0 177.00 400.00 \\\n",
"1 156.00 352.00 \n",
"2 155.00 352.00 \n",
"3 165.00 376.00 \n",
"4 193.00 389.00 \n",
"... ... ... \n",
"32210 240.00 485.00 \n",
"32211 259.00 532.00 \n",
"32212 203.00 429.00 \n",
"32213 168.00 339.00 \n",
"32214 138.00 279.00 \n",
"\n",
"Kierunki migracji Zameldowania_z_miast_kobiety \n",
"0 70.00 \\\n",
"1 67.00 \n",
"2 78.00 \n",
"3 83.00 \n",
"4 71.00 \n",
"... ... \n",
"32210 156.00 \n",
"32211 179.00 \n",
"32212 159.00 \n",
"32213 109.00 \n",
"32214 85.00 \n",
"\n",
"Kierunki migracji Zameldowania_z_miast_mezczyzni \n",
"0 52.00 \\\n",
"1 59.00 \n",
"2 61.00 \n",
"3 58.00 \n",
"4 71.00 \n",
"... ... \n",
"32210 138.00 \n",
"32211 149.00 \n",
"32212 131.00 \n",
"32213 95.00 \n",
"32214 71.00 \n",
"\n",
"Kierunki migracji Zameldowania_z_miast_ogolem Zameldowania_ze_wsi_kobiety \n",
"0 122.00 147.00 \\\n",
"1 126.00 125.00 \n",
"2 139.00 116.00 \n",
"3 141.00 128.00 \n",
"4 142.00 125.00 \n",
"... ... ... \n",
"32210 294.00 73.00 \n",
"32211 328.00 71.00 \n",
"32212 290.00 52.00 \n",
"32213 204.00 49.00 \n",
"32214 156.00 38.00 \n",
"\n",
"Kierunki migracji Zameldowania_ze_wsi_mezczyzni Zameldowania_ze_wsi_ogolem \n",
"0 118.00 265.00 \n",
"1 94.00 219.00 \n",
"2 92.00 208.00 \n",
"3 101.00 229.00 \n",
"4 121.00 246.00 \n",
"... ... ... \n",
"32210 79.00 152.00 \n",
"32211 90.00 161.00 \n",
"32212 53.00 105.00 \n",
"32213 46.00 95.00 \n",
"32214 39.00 77.00 \n",
"\n",
"[32215 rows x 25 columns]"
]
},
"execution_count": 776,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ludn_5 = pd.read_csv(\n",
" 'LUDN_1355_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ludn_5['Kierunki migracji'] = df_ludn_5['Kierunki migracji'] + df_ludn_5['Płeć']\n",
"df_ludn_5 = df_ludn_5[['Kod', 'Kierunki migracji', 'Rok', 'Wartosc']]\n",
"df_ludn_5 = df_ludn_5.dropna()\n",
"df_ludn_5 = df_ludn_5.loc[df_ludn_5['Kod'].str.endswith(('1', '2', '3'))]\n",
"df_ludn_5 = df_ludn_5.pivot_table(index=['Kod', 'Rok'], columns='Kierunki migracji', values='Wartosc').reset_index()\n",
"df_ludn_5 = df_ludn_5.rename(columns={\n",
" 'saldo migracji na 1000 ludnościogółem': 'Saldo_migracji_na_1000_ludnosci',\n",
" 'saldo migracjiogółem': 'Saldo_migracji',\n",
" 'wymeldowania do miastkobiety': 'Wymeldowania_do_miast_kobiety',\n",
" 'wymeldowania do miastmężczyźni': 'Wymeldowania_do_miast_mezczyzni',\n",
" 'wymeldowania do miastogółem': 'Wymeldowania_do_miast_ogolem',\n",
" 'wymeldowania na wieśkobiety': 'Wymeldowania_na_wies_kobiety',\n",
" 'wymeldowania na wieśmężczyźni': 'Wymeldowania_na_wies_mezczyzni',\n",
" 'wymeldowania na wieśogółem': 'Wymeldowania_na_wies_ogolem',\n",
" 'wymeldowania ogółemkobiety': 'Wymeldowania_kobiety',\n",
" 'wymeldowania ogółemmężczyźni': 'Wymeldowania_mezczyzni',\n",
" 'wymeldowania ogółemogółem': 'Wymeldowania_ogolem',\n",
" 'wymeldowania za granicękobiety': 'Wymeldowania_za_granice_kobiety',\n",
" 'wymeldowania za granicęmężczyźni': 'Wymeldowania_za_granice_mezczyzni',\n",
" 'wymeldowania za granicęogółem': 'Wymeldowania_za_granice_ogolem',\n",
" 'zameldowania ogółemkobiety': 'Zameldowania_kobiety',\n",
" 'zameldowania ogółemmężczyźni': 'Zameldowania_mezczyzni',\n",
" 'zameldowania ogółemogółem': 'Zameldowania_ogolem',\n",
" 'zameldowania z miastkobiety': 'Zameldowania_z_miast_kobiety',\n",
" 'zameldowania z miastmężczyźni': 'Zameldowania_z_miast_mezczyzni',\n",
" 'zameldowania z miastogółem': 'Zameldowania_z_miast_ogolem',\n",
" 'zameldowania ze wsikobiety': 'Zameldowania_ze_wsi_kobiety',\n",
" 'zameldowania ze wsimężczyźni': 'Zameldowania_ze_wsi_mezczyzni',\n",
" 'zameldowania ze wsiogółem': 'Zameldowania_ze_wsi_ogolem'})\n",
"\n",
"df_ludn_5"
]
},
{
"cell_type": "code",
"execution_count": 777,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Turystyczne obiekty noclegowe</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Miejsca_noclegowe_caloroczne</th>\n",
" <th>Miejsca_noclegowe_ogolem</th>\n",
" <th>Obiekty_caloroczne</th>\n",
" <th>Obiekty_ogolem</th>\n",
" <th>Turysci_ogolem</th>\n",
" <th>Turysci_zagraniczni</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2010</td>\n",
" <td>265.00</td>\n",
" <td>265.00</td>\n",
" <td>7.00</td>\n",
" <td>7.00</td>\n",
" <td>16427.00</td>\n",
" <td>5173.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>267.00</td>\n",
" <td>267.00</td>\n",
" <td>7.00</td>\n",
" <td>7.00</td>\n",
" <td>13134.00</td>\n",
" <td>4486.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>295.00</td>\n",
" <td>295.00</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>13159.00</td>\n",
" <td>4856.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>293.00</td>\n",
" <td>293.00</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>11914.00</td>\n",
" <td>4701.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>292.00</td>\n",
" <td>292.00</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>12398.00</td>\n",
" <td>3919.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34697</th>\n",
" <td>3263011</td>\n",
" <td>2018</td>\n",
" <td>9757.00</td>\n",
" <td>11717.00</td>\n",
" <td>76.00</td>\n",
" <td>107.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34698</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>9963.00</td>\n",
" <td>11805.00</td>\n",
" <td>74.00</td>\n",
" <td>103.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34699</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>9673.00</td>\n",
" <td>11557.00</td>\n",
" <td>68.00</td>\n",
" <td>97.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34700</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>8731.00</td>\n",
" <td>10551.00</td>\n",
" <td>66.00</td>\n",
" <td>92.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34701</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>8893.00</td>\n",
" <td>10738.00</td>\n",
" <td>68.00</td>\n",
" <td>92.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>34702 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
"Turystyczne obiekty noclegowe Kod Rok Miejsca_noclegowe_caloroczne \n",
"0 0201011 2010 265.00 \\\n",
"1 0201011 2011 267.00 \n",
"2 0201011 2012 295.00 \n",
"3 0201011 2013 293.00 \n",
"4 0201011 2014 292.00 \n",
"... ... ... ... \n",
"34697 3263011 2018 9757.00 \n",
"34698 3263011 2019 9963.00 \n",
"34699 3263011 2020 9673.00 \n",
"34700 3263011 2021 8731.00 \n",
"34701 3263011 2022 8893.00 \n",
"\n",
"Turystyczne obiekty noclegowe Miejsca_noclegowe_ogolem Obiekty_caloroczne \n",
"0 265.00 7.00 \\\n",
"1 267.00 7.00 \n",
"2 295.00 8.00 \n",
"3 293.00 8.00 \n",
"4 292.00 8.00 \n",
"... ... ... \n",
"34697 11717.00 76.00 \n",
"34698 11805.00 74.00 \n",
"34699 11557.00 68.00 \n",
"34700 10551.00 66.00 \n",
"34701 10738.00 68.00 \n",
"\n",
"Turystyczne obiekty noclegowe Obiekty_ogolem Turysci_ogolem \n",
"0 7.00 16427.00 \\\n",
"1 7.00 13134.00 \n",
"2 8.00 13159.00 \n",
"3 8.00 11914.00 \n",
"4 8.00 12398.00 \n",
"... ... ... \n",
"34697 107.00 NaN \n",
"34698 103.00 NaN \n",
"34699 97.00 NaN \n",
"34700 92.00 NaN \n",
"34701 92.00 NaN \n",
"\n",
"Turystyczne obiekty noclegowe Turysci_zagraniczni \n",
"0 5173.00 \n",
"1 4486.00 \n",
"2 4856.00 \n",
"3 4701.00 \n",
"4 3919.00 \n",
"... ... \n",
"34697 NaN \n",
"34698 NaN \n",
"34699 NaN \n",
"34700 NaN \n",
"34701 NaN \n",
"\n",
"[34702 rows x 8 columns]"
]
},
"execution_count": 777,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_tury = pd.read_csv(\n",
" 'TURY_2017_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_tury = df_tury[['Kod', 'Turystyczne obiekty noclegowe', 'Rok', 'Wartosc']]\n",
"df_tury = df_tury.dropna()\n",
"df_tury = df_tury.pivot_table(index=['Kod', 'Rok'], columns='Turystyczne obiekty noclegowe', values='Wartosc').reset_index()\n",
"df_tury = df_tury.rename(columns={\n",
" 'miejsca noclegowe całoroczne lipiec': 'Miejsca_noclegowe_caloroczne',\n",
" 'miejsca noclegowe ogółem lipiec': 'Miejsca_noclegowe_ogolem',\n",
" 'obiekty całoroczne lipiec': 'Obiekty_caloroczne',\n",
" 'obiekty ogółem lipiec': 'Obiekty_ogolem',\n",
" 'turyści (korzystający) ogółem styczeń-grudzień': 'Turysci_ogolem',\n",
" 'turyści zagraniczni (korzystający) - nierezydenci styczeń-grudzień': 'Turysci_zagraniczni'})\n",
"\n",
"df_tury"
]
},
{
"cell_type": "code",
"execution_count": 778,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Bezrobotni</th>\n",
" <th>Kod</th>\n",
" <th>Rok</th>\n",
" <th>Bezrobotni_do_25_roku_zycia</th>\n",
" <th>Bezrobotni_do_30_roku_zycia</th>\n",
" <th>Dlugotrwale_bezrobotni</th>\n",
" <th>Bezrobotne_kobiety</th>\n",
" <th>Bezrobotni_mezczyzni</th>\n",
" <th>Bezrobotni_ogolem</th>\n",
" <th>Bezrobotni_powyzej_50_roku_zycia</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0201011</td>\n",
" <td>2011</td>\n",
" <td>284.00</td>\n",
" <td>NaN</td>\n",
" <td>819.50</td>\n",
" <td>900.50</td>\n",
" <td>818.00</td>\n",
" <td>1718.50</td>\n",
" <td>486.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0201011</td>\n",
" <td>2012</td>\n",
" <td>293.00</td>\n",
" <td>NaN</td>\n",
" <td>756.50</td>\n",
" <td>894.50</td>\n",
" <td>888.00</td>\n",
" <td>1782.50</td>\n",
" <td>498.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0201011</td>\n",
" <td>2013</td>\n",
" <td>253.50</td>\n",
" <td>NaN</td>\n",
" <td>788.00</td>\n",
" <td>869.50</td>\n",
" <td>874.00</td>\n",
" <td>1743.50</td>\n",
" <td>521.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0201011</td>\n",
" <td>2014</td>\n",
" <td>172.50</td>\n",
" <td>NaN</td>\n",
" <td>651.50</td>\n",
" <td>648.50</td>\n",
" <td>667.50</td>\n",
" <td>1316.00</td>\n",
" <td>402.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0201011</td>\n",
" <td>2015</td>\n",
" <td>107.50</td>\n",
" <td>238.00</td>\n",
" <td>434.50</td>\n",
" <td>504.00</td>\n",
" <td>518.50</td>\n",
" <td>1022.50</td>\n",
" <td>359.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48530</th>\n",
" <td>3263011</td>\n",
" <td>2019</td>\n",
" <td>27.50</td>\n",
" <td>66.00</td>\n",
" <td>226.50</td>\n",
" <td>272.50</td>\n",
" <td>221.00</td>\n",
" <td>493.50</td>\n",
" <td>181.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48531</th>\n",
" <td>3263011</td>\n",
" <td>2020</td>\n",
" <td>56.00</td>\n",
" <td>142.00</td>\n",
" <td>239.50</td>\n",
" <td>390.00</td>\n",
" <td>361.50</td>\n",
" <td>751.50</td>\n",
" <td>250.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48532</th>\n",
" <td>3263011</td>\n",
" <td>2021</td>\n",
" <td>34.50</td>\n",
" <td>88.00</td>\n",
" <td>260.50</td>\n",
" <td>295.00</td>\n",
" <td>341.00</td>\n",
" <td>636.00</td>\n",
" <td>239.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48533</th>\n",
" <td>3263011</td>\n",
" <td>2022</td>\n",
" <td>31.50</td>\n",
" <td>72.00</td>\n",
" <td>199.00</td>\n",
" <td>211.50</td>\n",
" <td>270.50</td>\n",
" <td>482.00</td>\n",
" <td>182.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48534</th>\n",
" <td>3263011</td>\n",
" <td>2023</td>\n",
" <td>33.50</td>\n",
" <td>81.00</td>\n",
" <td>200.00</td>\n",
" <td>241.00</td>\n",
" <td>287.50</td>\n",
" <td>528.50</td>\n",
" <td>189.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>48535 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
"Bezrobotni Kod Rok Bezrobotni_do_25_roku_zycia \n",
"0 0201011 2011 284.00 \\\n",
"1 0201011 2012 293.00 \n",
"2 0201011 2013 253.50 \n",
"3 0201011 2014 172.50 \n",
"4 0201011 2015 107.50 \n",
"... ... ... ... \n",
"48530 3263011 2019 27.50 \n",
"48531 3263011 2020 56.00 \n",
"48532 3263011 2021 34.50 \n",
"48533 3263011 2022 31.50 \n",
"48534 3263011 2023 33.50 \n",
"\n",
"Bezrobotni Bezrobotni_do_30_roku_zycia Dlugotrwale_bezrobotni \n",
"0 NaN 819.50 \\\n",
"1 NaN 756.50 \n",
"2 NaN 788.00 \n",
"3 NaN 651.50 \n",
"4 238.00 434.50 \n",
"... ... ... \n",
"48530 66.00 226.50 \n",
"48531 142.00 239.50 \n",
"48532 88.00 260.50 \n",
"48533 72.00 199.00 \n",
"48534 81.00 200.00 \n",
"\n",
"Bezrobotni Bezrobotne_kobiety Bezrobotni_mezczyzni Bezrobotni_ogolem \n",
"0 900.50 818.00 1718.50 \\\n",
"1 894.50 888.00 1782.50 \n",
"2 869.50 874.00 1743.50 \n",
"3 648.50 667.50 1316.00 \n",
"4 504.00 518.50 1022.50 \n",
"... ... ... ... \n",
"48530 272.50 221.00 493.50 \n",
"48531 390.00 361.50 751.50 \n",
"48532 295.00 341.00 636.00 \n",
"48533 211.50 270.50 482.00 \n",
"48534 241.00 287.50 528.50 \n",
"\n",
"Bezrobotni Bezrobotni_powyzej_50_roku_zycia \n",
"0 486.50 \n",
"1 498.50 \n",
"2 521.00 \n",
"3 402.00 \n",
"4 359.00 \n",
"... ... \n",
"48530 181.00 \n",
"48531 250.00 \n",
"48532 239.50 \n",
"48533 182.50 \n",
"48534 189.00 \n",
"\n",
"[48535 rows x 9 columns]"
]
},
"execution_count": 778,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ryne = pd.read_csv(\n",
" 'RYNE_3733_CREL.csv',\n",
" sep=';',\n",
" converters={'Kod': str},\n",
" decimal=',')\n",
"df_ryne = df_ryne[['Kod', 'Bezrobotni', 'Rok', 'Wartosc']]\n",
"df_ryne = df_ryne.dropna()\n",
"df_ryne = df_ryne.pivot_table(index=['Kod', 'Rok'], columns='Bezrobotni', values='Wartosc').reset_index()\n",
"df_ryne = df_ryne.rename(columns={\n",
" 'do 25 roku życia': 'Bezrobotni_do_25_roku_zycia',\n",
" 'do 30 roku życia': 'Bezrobotni_do_30_roku_zycia',\n",
" 'długotrwale bezrobotni': 'Dlugotrwale_bezrobotni',\n",
" 'kobiety': 'Bezrobotne_kobiety',\n",
" 'mężczyźni': 'Bezrobotni_mezczyzni',\n",
" 'ogółem': 'Bezrobotni_ogolem',\n",
" 'powyżej 50 roku życia': 'Bezrobotni_powyzej_50_roku_zycia'})\n",
"\n",
"df_ryne"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
"execution_count": 779,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13451\n"
]
}
],
"source": [
"df_data = df_dofinansowanie_agg.copy()\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
"execution_count": 780,
"metadata": {},
"outputs": [],
"source": [
"wojewodztwo_dictionary = {\n",
"'02': 'Dolnoslaskie',\n",
"'04': 'Kujawsko_Pomorskie',\n",
"'06': 'Lubelskie',\n",
"'08': 'Lubuskie',\n",
"'10': 'Lodzkie',\n",
"'12': 'Malopolskie',\n",
"'14': 'Mazowieckie',\n",
"'16': 'Opolskie',\n",
"'18': 'Podkarpackie',\n",
"'20': 'Podlaskie',\n",
"'22': 'Pomorskie',\n",
"'24': 'Slaskie',\n",
"'26': 'Swietokrzyskie',\n",
"'28': 'Warminsko_Mazurskie',\n",
"'30': 'Wielkopolskie',\n",
"'32': 'Zachodniopomorskie'}\n",
"\n",
"df_data = pd.concat([df_data, pd.get_dummies(df_data['Kod'].apply(lambda x: wojewodztwo_dictionary.get(x[:2], None)), prefix='Wojewodztwo').astype(int)], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 781,
"metadata": {},
"outputs": [],
"source": [
"rodzaj_gminy_dictionary = {\n",
"'1': 'Gmina_miejska',\n",
"'2': 'Gmina_wiejska',\n",
"'3': 'Gmina_miejsko_wiejska'}\n",
"\n",
"df_data = pd.concat([df_data, pd.get_dummies(df_data['Kod'].apply(lambda x: rodzaj_gminy_dictionary.get(x[-1], None))).astype(int)], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 782,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13451\n"
]
}
],
"source": [
"df_data = df_data.merge(df_podz, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_podz['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_podz'))\n",
"df_data = df_data.drop(['key_0', 'Kod_podz'], axis=1)\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
"execution_count": 783,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13451\n"
]
}
],
"source": [
"df_data = df_data.merge(df_wyna, left_on=[df_data['Kod'].str.slice(stop=-3), 'Rok'], right_on=[df_wyna['Kod'].str.slice(stop=-3), 'Rok'], how='left', suffixes=(None, '_wyna'))\n",
"df_data = df_data.drop(['key_0', 'Kod_wyna'], axis=1)\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
"execution_count": 784,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13451\n",
"13451\n"
]
}
],
"source": [
"df_data = df_data.merge(df_fina_1, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_fina_1['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_fina_1'))\n",
"df_data = df_data.drop(['key_0', 'Kod_fina_1'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"df_data = df_data.merge(df_fina_2, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_fina_2['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_fina_2'))\n",
"df_data = df_data.drop(['key_0', 'Kod_fina_2'], axis=1)\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
"execution_count": 785,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13451\n",
"13451\n",
"13451\n",
"13451\n",
"13451\n"
]
}
],
"source": [
"# df_data = df_data.merge(df_ludn_1, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_1'))\n",
"df_data = df_data.merge(df_ludn_1, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_ludn_1['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_ludn_1'))\n",
"df_data = df_data.drop(['key_0', 'Kod_ludn_1'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"# df_data = df_data.merge(df_ludn_2, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_2'))\n",
"df_data = df_data.merge(df_ludn_2, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_ludn_2['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_ludn_2'))\n",
"df_data = df_data.drop(['key_0', 'Kod_ludn_2'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"# df_data = df_data.merge(df_ludn_3, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_3'))\n",
"df_data = df_data.merge(df_ludn_3, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_ludn_3['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_ludn_3'))\n",
"df_data = df_data.drop(['key_0', 'Kod_ludn_3'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"# df_data = df_data.merge(df_ludn_4, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_4'))\n",
"df_data = df_data.merge(df_ludn_4, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_ludn_4['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_ludn_4'))\n",
"df_data = df_data.drop(['key_0', 'Kod_ludn_4'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"# df_data = df_data.merge(df_ludn_5, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ludn_5'))\n",
"df_data = df_data.merge(df_ludn_5, left_on=[df_data['Kod'].str.slice(stop=-1), 'Rok'], right_on=[df_ludn_5['Kod'].str.slice(stop=-1), 'Rok'], how='left', suffixes=(None, '_ludn_5'))\n",
"df_data = df_data.drop(['key_0', 'Kod_ludn_5'], axis=1)\n",
"print(len(df_data))\n",
"\n",
"# df_data['Kod'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 786,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13451\n"
]
}
],
"source": [
"df_data = df_data.merge(df_tury, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_tury'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_tury'], axis=1)\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
"execution_count": 787,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13451\n"
]
}
],
"source": [
"df_data = df_data.merge(df_ryne, left_on=['Kod', 'Rok'], right_on=['Kod', 'Rok'], how='left', suffixes=(None, '_ryne'))\n",
"# df_data = df_data.drop(['key_0', 'Kod_ryne'], axis=1)\n",
"print(len(df_data))"
]
},
{
"cell_type": "code",
"execution_count": 788,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.44252452388990926"
]
},
"execution_count": 788,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_data['Gestosc_zaludnienia'] = df_data['Ludnosc'] / df_data['Powierzchnia']\n",
"\n",
"df_data['Gestosc_zaludnienia'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 789,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Kod 0\n",
"Program_operacyjny 0\n",
"Rok 0\n",
"Suma 0\n",
"Wojewodztwo_Dolnoslaskie 0\n",
" ... \n",
"Bezrobotne_kobiety 280\n",
"Bezrobotni_mezczyzni 280\n",
"Bezrobotni_ogolem 280\n",
"Bezrobotni_powyzej_50_roku_zycia 280\n",
"Gestosc_zaludnienia 97\n",
"Length: 108, dtype: int64"
]
},
"execution_count": 789,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_data.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 790,
"metadata": {},
"outputs": [],
"source": [
"# # df_data[[\n",
"# # 'Miejsca_noclegowe_caloroczne',\n",
"# # 'Miejsca_noclegowe_ogolem',\n",
"# # 'Obiekty_caloroczne',\n",
"# # 'Obiekty_ogolem',\n",
"# # 'Turysci_ogolem',\n",
"# # 'Turysci_zagraniczni']] = df_data[[\n",
"# # 'Miejsca_noclegowe_caloroczne',\n",
"# # 'Miejsca_noclegowe_ogolem',\n",
"# # 'Obiekty_caloroczne',\n",
"# # 'Obiekty_ogolem',\n",
"# # 'Turysci_ogolem',\n",
"# # 'Turysci_zagraniczni']].fillna(0)\n",
"# df_data.drop(columns=[\n",
"# 'Program_operacyjny',\n",
"# 'Wymeldowania_za_granice_kobiety',\n",
"# 'Wymeldowania_za_granice_mezczyzni',\n",
"# 'Wymeldowania_za_granice_ogolem',\n",
"# 'Bezrobotni_do_30_roku_zycia',\n",
" \n",
"# 'Wojewodztwo_Dolnoslaskie', # 43\n",
"# 'Wojewodztwo_Kujawsko_Pomorskie', # 44\n",
"# 'Wojewodztwo_Lubelskie', # 45\n",
"# 'Wojewodztwo_Lubuskie', # 46\n",
"# 'Wojewodztwo_Lodzkie', # 47\n",
"# 'Wojewodztwo_Malopolskie', # 48\n",
"# 'Wojewodztwo_Mazowieckie', # 49\n",
"# 'Wojewodztwo_Opolskie', # 50\n",
"# 'Wojewodztwo_Podkarpackie', # 51\n",
"# 'Wojewodztwo_Podlaskie', # 52\n",
"# 'Wojewodztwo_Pomorskie', # 53\n",
"# 'Wojewodztwo_Slaskie', # 54\n",
"# 'Wojewodztwo_Swietokrzyskie', # 55\n",
"# 'Wojewodztwo_Warminsko_Mazurskie', # 56\n",
"# 'Wojewodztwo_Wielkopolskie', # 57\n",
"# 'Wojewodztwo_Zachodniopomorskie', # 58\n",
"# 'Gmina_miejska', # 98\n",
"# 'Gmina_miejsko_wiejska', # 99\n",
"# 'Gmina_wiejska',\n",
"# 'Wynagrodzenie_ogolem','Wynagrodzenie_w_relacji_do_sredniej','Dochody_podatek_lesny','Dochody_podatek_PCC','Dochody_podatek_od_dzialalnosci_gospodarczej','Dochody_podatek_od_nieruchomosci',\n",
"# 'Dochody_podatek_od_spadkow','Dochody_podatek_od_srodkow_transportowych','Dochody_podatek_rolny','Dochody_podatek_odrebne_ustawy','Dochody_razem','Dochody_z_majatku',\n",
"# 'Dochody_z_najmu_i_dzierzawy','Dochody_z_uslug','Dochody_dofinansowanie_inwestycyjne','Dochody_dofinansowanie_razem','Udzialy_w_podatkach_dochodowych_od_osob_fizycznych','Udzialy_w_podatkach_dochodowych_od_osob_prywatnych',\n",
"# 'Miejsca_noclegowe_caloroczne',\n",
"# 'Miejsca_noclegowe_ogolem',\n",
"# 'Obiekty_caloroczne',\n",
"# 'Obiekty_ogolem',\n",
"# 'Turysci_ogolem',\n",
"# 'Turysci_zagraniczni',\n",
"# 'Udzialy_w_podatkach_dochodowych_razem','Wplywy_z_innych_lokalnych_oplat','Wplywy_z_oplaty_eksploatacyjnej','Wplywy_z_oplaty_skarbowej','Wplywy_z_oplaty_targowej'], inplace=True, errors='ignore')\n",
"\n",
"# df_data[(df_data.isna().any(axis=1)) & (df_data['Rok'] != 2023)].columns # ['Rok'].drop_duplicates().reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 791,
"metadata": {},
"outputs": [],
"source": [
"# df_data['Suma'] = df_data['Suma'] / df_data['Ludnosc']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
"execution_count": 792,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Kod\n",
"Program_operacyjny\n",
"Rok\n",
"Suma\n",
"Wojewodztwo_Dolnoslaskie\n",
"Wojewodztwo_Kujawsko_Pomorskie\n",
"Wojewodztwo_Lodzkie\n",
"Wojewodztwo_Lubelskie\n",
"Wojewodztwo_Lubuskie\n",
"Wojewodztwo_Malopolskie\n",
"Wojewodztwo_Mazowieckie\n",
"Wojewodztwo_Opolskie\n",
"Wojewodztwo_Podkarpackie\n",
"Wojewodztwo_Podlaskie\n",
"Wojewodztwo_Pomorskie\n",
"Wojewodztwo_Slaskie\n",
"Wojewodztwo_Swietokrzyskie\n",
"Wojewodztwo_Warminsko_Mazurskie\n",
"Wojewodztwo_Wielkopolskie\n",
"Wojewodztwo_Zachodniopomorskie\n",
"Gmina_miejska\n",
"Gmina_miejsko_wiejska\n",
"Gmina_wiejska\n",
"Powierzchnia\n",
"Wynagrodzenie_ogolem\n",
"Wynagrodzenie_w_relacji_do_sredniej\n",
"Dochody_podatek_lesny\n",
"Dochody_podatek_PCC\n",
"Dochody_podatek_od_dzialalnosci_gospodarczej\n",
"Dochody_podatek_od_nieruchomosci\n",
"Dochody_podatek_od_spadkow\n",
"Dochody_podatek_od_srodkow_transportowych\n",
"Dochody_podatek_rolny\n",
"Dochody_podatek_odrebne_ustawy\n",
"Dochody_razem\n",
"Dochody_z_majatku\n",
"Dochody_z_najmu_i_dzierzawy\n",
"Dochody_z_uslug\n",
"Dochody_dofinansowanie_inwestycyjne\n",
"Dochody_dofinansowanie_razem\n",
"Udzialy_w_podatkach_dochodowych_od_osob_fizycznych\n",
"Udzialy_w_podatkach_dochodowych_od_osob_prywatnych\n",
"Udzialy_w_podatkach_dochodowych_razem\n",
"Wplywy_z_innych_lokalnych_oplat\n",
"Wplywy_z_oplaty_eksploatacyjnej\n",
"Wplywy_z_oplaty_skarbowej\n",
"Wplywy_z_oplaty_targowej\n",
"Ludnosc_ogolem\n",
"Ludnosc_w_wieku_poprodukcyjnym\n",
"Ludnosc_w_wieku_produkcyjnym\n",
"Ludnosc_w_wieku_produkcyjnym_mobilnym\n",
"Ludnosc_w_wieku_produkcyjnym_niemobilnym\n",
"Ludnosc_w_wieku_przedprodukcyjnym\n",
"Ludnosc_mezczyzni\n",
"Ludnosc_mezczyzni_w_wieku_poprodukcyjnym\n",
"Ludnosc_mezczyzni_w_wieku_produkcyjnym\n",
"Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym\n",
"Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym\n",
"Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym\n",
"Ludnosc_kobiety\n",
"Ludnosc_kobiety_w_wieku_poprodukcyjnym\n",
"Ludnosc_kobiety_w_wieku_produkcyjnym\n",
"Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym\n",
"Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym\n",
"Ludnosc_kobiety_w_wieku_przedprodukcyjnym\n",
"Ludnosc_na_1_km2\n",
"Ludnosc\n",
"Ludnosc_kobiety_ludn_4\n",
"Ludnosc_mezczyzni_ludn_4\n",
"Wskaznik_urbanizacji\n",
"Zmiana_liczby_ludnosci\n",
"Saldo_migracji_na_1000_ludnosci\n",
"Saldo_migracji\n",
"Wymeldowania_do_miast_kobiety\n",
"Wymeldowania_do_miast_mezczyzni\n",
"Wymeldowania_do_miast_ogolem\n",
"Wymeldowania_na_wies_kobiety\n",
"Wymeldowania_na_wies_mezczyzni\n",
"Wymeldowania_na_wies_ogolem\n",
"Wymeldowania_kobiety\n",
"Wymeldowania_mezczyzni\n",
"Wymeldowania_ogolem\n",
"Zameldowania_kobiety\n",
"Zameldowania_mezczyzni\n",
"Zameldowania_ogolem\n",
"Zameldowania_z_miast_kobiety\n",
"Zameldowania_z_miast_mezczyzni\n",
"Zameldowania_z_miast_ogolem\n",
"Zameldowania_ze_wsi_kobiety\n",
"Zameldowania_ze_wsi_mezczyzni\n",
"Zameldowania_ze_wsi_ogolem\n",
"Miejsca_noclegowe_caloroczne\n",
"Miejsca_noclegowe_ogolem\n",
"Obiekty_caloroczne\n",
"Obiekty_ogolem\n",
"Turysci_ogolem\n",
"Turysci_zagraniczni\n",
"Bezrobotni_do_25_roku_zycia\n",
"Dlugotrwale_bezrobotni\n",
"Bezrobotne_kobiety\n",
"Bezrobotni_mezczyzni\n",
"Bezrobotni_ogolem\n",
"Bezrobotni_powyzej_50_roku_zycia\n",
"Gestosc_zaludnienia\n"
]
}
],
"source": [
"# Usunięcie niepotrzebnych kolumn...\n",
"df_data.drop(columns=[\n",
" # 'Program_operacyjny',\n",
" 'Wymeldowania_za_granice_kobiety',\n",
" 'Wymeldowania_za_granice_mezczyzni',\n",
" 'Wymeldowania_za_granice_ogolem',\n",
" 'Bezrobotni_do_30_roku_zycia'], inplace=True, errors='ignore')\n",
"\n",
"df_data.to_csv('dane1.csv', index=False)\n",
"\n",
"for c in df_data.columns:\n",
" print(c)"
]
},
{
"cell_type": "code",
"execution_count": 793,
"metadata": {},
"outputs": [],
"source": [
"s = df_data.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 794,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Wynagrodzenie_ogolem 97\n",
"Wynagrodzenie_w_relacji_do_sredniej 97\n",
"Dochody_podatek_lesny 97\n",
"Dochody_podatek_PCC 97\n",
"Dochody_podatek_od_dzialalnosci_gospodarczej 97\n",
" ... \n",
"Bezrobotne_kobiety 280\n",
"Bezrobotni_mezczyzni 280\n",
"Bezrobotni_ogolem 280\n",
"Bezrobotni_powyzej_50_roku_zycia 280\n",
"Gestosc_zaludnienia 97\n",
"Length: 80, dtype: int64"
]
},
"execution_count": 794,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[s > 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"..."
]
},
{
"cell_type": "code",
"execution_count": 795,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13451\n",
"13067\n",
"Mean Squared Error: 2235175313583462.0\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.tree import DecisionTreeRegressor, plot_tree, export_text\n",
"from sklearn.metrics import mean_squared_error\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df_data[[\n",
" 'Miejsca_noclegowe_caloroczne',\n",
" 'Miejsca_noclegowe_ogolem',\n",
" 'Obiekty_caloroczne',\n",
" 'Obiekty_ogolem',\n",
" 'Turysci_ogolem',\n",
" 'Turysci_zagraniczni']] = df_data[[\n",
" 'Miejsca_noclegowe_caloroczne',\n",
" 'Miejsca_noclegowe_ogolem',\n",
" 'Obiekty_caloroczne',\n",
" 'Obiekty_ogolem',\n",
" 'Turysci_ogolem',\n",
" 'Turysci_zagraniczni']].fillna(0)\n",
"\n",
"feature_names = [\n",
" 'Powierzchnia', # 1\n",
" 'Wynagrodzenie_ogolem', # 2\n",
" 'Wynagrodzenie_w_relacji_do_sredniej', # 3\n",
" 'Dochody_podatek_lesny', # 4\n",
" 'Dochody_podatek_PCC', # 5\n",
" 'Dochody_podatek_od_dzialalnosci_gospodarczej', # 6\n",
" 'Dochody_podatek_od_nieruchomosci', # 7\n",
" 'Dochody_podatek_od_spadkow', # 8\n",
" 'Dochody_podatek_od_srodkow_transportowych', # 9\n",
" 'Dochody_podatek_rolny', # 10\n",
" 'Dochody_podatek_odrebne_ustawy', # 11\n",
" 'Dochody_razem', # 12\n",
" 'Dochody_z_majatku', # 13\n",
" 'Dochody_z_najmu_i_dzierzawy', # 14\n",
" 'Dochody_z_uslug', # 15\n",
" 'Dochody_dofinansowanie_inwestycyjne', # 16\n",
" 'Dochody_dofinansowanie_razem', # 17\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_fizycznych', # 18\n",
" 'Udzialy_w_podatkach_dochodowych_od_osob_prywatnych', # 19\n",
" 'Udzialy_w_podatkach_dochodowych_razem', # 20\n",
" 'Wplywy_z_innych_lokalnych_oplat', # 21\n",
" 'Wplywy_z_oplaty_eksploatacyjnej', # 22\n",
" 'Wplywy_z_oplaty_skarbowej', # 23\n",
" 'Wplywy_z_oplaty_targowej', # 24\n",
" 'Ludnosc_ogolem', # 25\n",
" 'Ludnosc_w_wieku_poprodukcyjnym', # 26\n",
" 'Ludnosc_w_wieku_produkcyjnym', # 27\n",
" 'Ludnosc_w_wieku_produkcyjnym_mobilnym', # 28\n",
" 'Ludnosc_w_wieku_produkcyjnym_niemobilnym', # 29\n",
" 'Ludnosc_w_wieku_przedprodukcyjnym', # 30\n",
" 'Ludnosc_mezczyzni', # 31\n",
" 'Ludnosc_mezczyzni_w_wieku_poprodukcyjnym', # 32\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym', # 33\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym', # 34\n",
" 'Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym', # 35\n",
" 'Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym', # 36\n",
" 'Ludnosc_kobiety', # 37\n",
" 'Ludnosc_kobiety_w_wieku_poprodukcyjnym', # 38\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym', # 39\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym', # 40\n",
" 'Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym', # 41\n",
" 'Ludnosc_kobiety_w_wieku_przedprodukcyjnym', # 42\n",
" 'Wojewodztwo_Dolnoslaskie', # 43\n",
" 'Wojewodztwo_Kujawsko_Pomorskie', # 44\n",
" 'Wojewodztwo_Lubelskie', # 45\n",
" 'Wojewodztwo_Lubuskie', # 46\n",
" 'Wojewodztwo_Lodzkie', # 47\n",
" 'Wojewodztwo_Malopolskie', # 48\n",
" 'Wojewodztwo_Mazowieckie', # 49\n",
" 'Wojewodztwo_Opolskie', # 50\n",
" 'Wojewodztwo_Podkarpackie', # 51\n",
" 'Wojewodztwo_Podlaskie', # 52\n",
" 'Wojewodztwo_Pomorskie', # 53\n",
" 'Wojewodztwo_Slaskie', # 54\n",
" 'Wojewodztwo_Swietokrzyskie', # 55\n",
" 'Wojewodztwo_Warminsko_Mazurskie', # 56\n",
" 'Wojewodztwo_Wielkopolskie', # 57\n",
" 'Wojewodztwo_Zachodniopomorskie', # 58\n",
" 'Gestosc_zaludnienia', # 59\n",
" 'Ludnosc_na_1_km2', # 60\n",
" 'Ludnosc', # 61\n",
" 'Ludnosc_kobiety', # 62\n",
" 'Ludnosc_mezczyzni', # 63\n",
" 'Wskaznik_urbanizacji', # 64\n",
" 'Zmiana_liczby_ludnosci', # 65\n",
" 'Saldo_migracji_na_1000_ludnosci', # 66\n",
" 'Saldo_migracji', # 67\n",
" 'Wymeldowania_do_miast_kobiety', # 68\n",
" 'Wymeldowania_do_miast_mezczyzni', # 69\n",
" 'Wymeldowania_do_miast_ogolem', # 70\n",
" 'Wymeldowania_na_wies_kobiety', # 71\n",
" 'Wymeldowania_na_wies_mezczyzni', # 72\n",
" 'Wymeldowania_na_wies_ogolem', # 73\n",
" 'Wymeldowania_kobiety', # 74\n",
" 'Wymeldowania_mezczyzni', # 75\n",
" 'Wymeldowania_ogolem', # 76\n",
" 'Zameldowania_kobiety', # 77\n",
" 'Zameldowania_mezczyzni', # 78\n",
" 'Zameldowania_ogolem', # 79\n",
" 'Zameldowania_z_miast_kobiety', # 80\n",
" 'Zameldowania_z_miast_mezczyzni', # 81\n",
" 'Zameldowania_z_miast_ogolem', # 82\n",
" 'Zameldowania_ze_wsi_kobiety', # 83\n",
" 'Zameldowania_ze_wsi_mezczyzni', # 84\n",
" 'Zameldowania_ze_wsi_ogolem', # 85\n",
" 'Miejsca_noclegowe_caloroczne', # 86\n",
" 'Miejsca_noclegowe_ogolem', # 87\n",
" 'Obiekty_caloroczne', # 88\n",
" 'Obiekty_ogolem', # 89\n",
" 'Turysci_ogolem', # 90\n",
" 'Turysci_zagraniczni', # 91\n",
" 'Bezrobotni_do_25_roku_zycia', # 92\n",
" 'Dlugotrwale_bezrobotni', # 93\n",
" 'Bezrobotne_kobiety', # 94\n",
" 'Bezrobotni_mezczyzni', # 95\n",
" 'Bezrobotni_ogolem', # 96\n",
" 'Bezrobotni_powyzej_50_roku_zycia', # 97\n",
" 'Gmina_miejska', # 98\n",
" 'Gmina_miejsko_wiejska', # 99\n",
" 'Gmina_wiejska'] # 100\n",
"\n",
"df_data.drop(columns=[\n",
" 'Program_operacyjny',\n",
" 'Wymeldowania_za_granice_kobiety',\n",
" 'Wymeldowania_za_granice_mezczyzni',\n",
" 'Wymeldowania_za_granice_ogolem',\n",
" 'Bezrobotni_do_30_roku_zycia'], inplace=True, errors='ignore')\n",
"\n",
"print(len(df_data))\n",
"df_data.dropna(inplace=True)\n",
"df_data = df_data[df_data['Suma'] > 0]\n",
"print(len(df_data))\n",
"\n",
"X = df_data[feature_names]\n",
"y = df_data['Suma']\n",
"\n",
"color_column = df_data['Gestosc_zaludnienia']\n",
"color_column = (df_data['Gestosc_zaludnienia'] > 1.5).astype(int)\n",
"\n",
"X_train, X_test, y_train, y_test, color_column_train, color_column_test = train_test_split(X, y, color_column, test_size=0.2, random_state=1)\n",
"\n",
"model = DecisionTreeRegressor(random_state=1)\n",
"model.fit(X_train, y_train)\n",
"\n",
"y_pred = model.predict(X_test)\n",
"mse = mean_squared_error(y_test, y_pred)\n",
"print('Mean Squared Error:', mse)"
]
},
{
"cell_type": "code",
"execution_count": 796,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Turysci_ogolem 0.65\n",
"Turysci_zagraniczni 0.60\n",
"Bezrobotni_powyzej_50_roku_zycia 0.47\n",
"Bezrobotni_mezczyzni 0.47\n",
"Bezrobotni_ogolem 0.46\n",
"Dlugotrwale_bezrobotni 0.45\n",
"Bezrobotne_kobiety 0.45\n",
"Bezrobotni_do_25_roku_zycia 0.44\n",
"Dochody_z_majatku 0.42\n",
"Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym 0.39\n",
"Ludnosc_w_wieku_produkcyjnym_niemobilnym 0.39\n",
"Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym 0.39\n",
"Ludnosc_kobiety_w_wieku_produkcyjnym 0.39\n",
"Ludnosc_w_wieku_produkcyjnym 0.39\n",
"Ludnosc_mezczyzni_w_wieku_produkcyjnym 0.39\n",
"Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym 0.38\n",
"Ludnosc_w_wieku_produkcyjnym_mobilnym 0.38\n",
"Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym 0.38\n",
"Ludnosc_kobiety_ludn_4 0.38\n",
"Ludnosc_kobiety 0.38\n",
"Dochody_z_uslug 0.38\n",
"Ludnosc_ogolem 0.38\n",
"Ludnosc 0.38\n",
"Zameldowania_z_miast_kobiety 0.38\n",
"Ludnosc_mezczyzni 0.38\n",
"Ludnosc_mezczyzni_ludn_4 0.38\n",
"Ludnosc_kobiety_w_wieku_poprodukcyjnym 0.38\n",
"Zameldowania_z_miast_ogolem 0.38\n",
"Name: Suma, dtype: float64\n"
]
}
],
"source": [
"correlation_matrix = df_data.corr()\n",
"print(correlation_matrix['Suma'].sort_values(ascending=False)[1:29])"
]
},
{
"cell_type": "code",
"execution_count": 797,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABoDElEQVR4nO3deXhV5b328e/a886ckImEQJgHlUEQREUcUBTEo7Zq1QpitbXVHitvbbW2om0Vbav1nKrlaJ3aqqit2lat1kZRKSgCIiDzGKZMQOZkT+t5/9gQjQRMQsImO/fnuvbVZo2/tQLs27WewTLGGERERETihCPWBYiIiIh0JIUbERERiSsKNyIiIhJXFG5EREQkrijciIiISFxRuBEREZG4onAjIiIicUXhRkREROKKwo2IiIjEFYUbEekUlmVx1113xbqMmDvjjDM444wzmn7eunUrlmXx9NNPx6ymL/tyjSJdncKNSBfw6KOPYlkW48aNa/cxdu3axV133cXy5cs7rrBj3Pz587Esq+njdrvp168f06dPZ/PmzbEur00WLlzIXXfdRWVlZaxLETnmuWJdgIh8tWeffZbCwkIWL17Mxo0bGTBgQJuPsWvXLu6++24KCwsZOXJkxxd5DPvv//5vTjrpJEKhEMuWLeOxxx7j9ddfZ+XKleTl5R3VWvr06UNDQwNut7tN+y1cuJC7776ba665hrS0tM4pTiRO6MmNyDFuy5YtLFy4kAcffJCsrCyeffbZWJfU5UyYMIFvfvObzJw5k9/97nf85je/Ye/evTzzzDOH3Keurq5TarEsC5/Ph9Pp7JTji4jCjcgx79lnnyU9PZ2pU6fy9a9//ZDhprKykltuuYXCwkK8Xi+9evVi+vTpVFRUMH/+fE466SQAZs6c2fSa5kC7j8LCQq655pqDjvnlthjBYJA777yT0aNHk5qaSmJiIhMmTODdd99t83WVlpbicrm4++67D1q3bt06LMvi4YcfBiAUCnH33XczcOBAfD4fPXr04LTTTuPtt99u83kBzjrrLCAaHAHuuusuLMti9erVXHnllaSnp3Paaac1bf/nP/+Z0aNH4/f7ycjI4Bvf+Abbt28/6LiPPfYY/fv3x+/3M3bsWD744IODtjlUm5u1a9dy2WWXkZWVhd/vZ/Dgwdxxxx1N9d16660A9O3bt+n3t3Xr1k6pUaSr02spkWPcs88+yyWXXILH4+GKK67g97//PR9//HFTWAGora1lwoQJrFmzhmuvvZYTTzyRiooK/v73v7Njxw6GDh3Kz3/+c+68806+/e1vM2HCBABOOeWUNtVSXV3NH/7wB6644gquv/56ampqeOKJJ5g8eTKLFy9u0+uunJwcJk6cyIsvvsjs2bObrXvhhRdwOp1ceumlQPTLfc6cOVx33XWMHTuW6upqlixZwrJlyzjnnHPadA0AmzZtAqBHjx7Nll966aUMHDiQe++9F2MMAPfccw8/+9nPuOyyy7juuusoLy/nd7/7HaeffjqffPJJ0yuiJ554gu985zuccsop/OAHP2Dz5s1ceOGFZGRkUFBQcNh6VqxYwYQJE3C73Xz729+msLCQTZs28Y9//IN77rmHSy65hPXr1/P888/z29/+lszMTACysrKOWo0iXYoRkWPWkiVLDGDefvttY4wxtm2bXr16mZtvvrnZdnfeeacBzMsvv3zQMWzbNsYY8/HHHxvAPPXUUwdt06dPHzNjxoyDlk+cONFMnDix6edwOGwCgUCzbfbt22dycnLMtdde22w5YGbPnn3Y6/u///s/A5iVK1c2Wz5s2DBz1llnNf08YsQIM3Xq1MMeqyXvvvuuAcyTTz5pysvLza5du8zrr79uCgsLjWVZ5uOPPzbGGDN79mwDmCuuuKLZ/lu3bjVOp9Pcc889zZavXLnSuFyupuXBYNBkZ2ebkSNHNrs/jz32mAGa3cMtW7Yc9Hs4/fTTTXJystm2bVuz8xz43RljzK9//WsDmC1btnR6jSJdnV5LiRzDnn32WXJycjjzzDOBaHuNyy+/nHnz5hGJRJq2++tf/8qIESO4+OKLDzqGZVkdVo/T6cTj8QBg2zZ79+4lHA4zZswYli1b1ubjXXLJJbhcLl544YWmZatWrWL16tVcfvnlTcvS0tL47LPP2LBhQ7vqvvbaa8nKyiIvL4+pU6dSV1fHM888w5gxY5ptd8MNNzT7+eWXX8a2bS677DIqKiqaPrm5uQwcOLDpddySJUsoKyvjhhtuaLo/ANdccw2pqamHra28vJz333+fa6+9lt69ezdb15rf3dGoUaSr6dbh5v3332fatGnk5eVhWRavvvpqm4/x4osvMnLkSBISEujTpw+//vWvO75Q6ZYikQjz5s3jzDPPZMuWLWzcuJGNGzcybtw4SktLKSoqatp206ZNHH/88UelrmeeeYbhw4c3tX3Jysri9ddfp6qqqs3HyszM5Oyzz+bFF19sWvbCCy/gcrm45JJLmpb9/Oc/p7KykkGDBnHCCSdw6623smLFilaf58477+Ttt9/mnXfeYcWKFezatYurr776oO369u3b7OcNGzZgjGHgwIFkZWU1+6xZs4aysjIAtm3bBsDAgQOb7X+g6/nhHOiS3t7f39GoUaSr6dZtburq6hgxYgTXXntts39IW+uf//wnV111Fb/73e8499xzWbNmDddffz1+v5+bbrqpEyqW7uSdd95h9+7dzJs3j3nz5h20/tlnn+Xcc8/tkHMd6glBJBJp1qvnz3/+M9dccw0XXXQRt956K9nZ2TidTubMmdPUjqWtvvGNbzBz5kyWL1/OyJEjefHFFzn77LOb2pUAnH766WzatIm//e1v/Otf/+IPf/gDv/3tb5k7dy7XXXfdV57jhBNOYNKkSV+5nd/vb/azbdtYlsU///nPFns3JSUlteIKO1dXqFHkaOvW4eb888/n/PPPP+T6QCDAHXfcwfPPP09lZSXHH388999/f1PvkT/96U9cdNFFTY+y+/Xrx+23387999/PjTfe2KGvA6T7efbZZ8nOzuaRRx45aN3LL7/MK6+8wty5c/H7/fTv359Vq1Yd9niH+/OYnp7e4uBw27Zta/Zf9X/5y1/o168fL7/8crPjfblBcFtcdNFFfOc732l6NbV+/Xpuv/32g7bLyMhg5syZzJw5k9raWk4//XTuuuuuVoWb9urfvz/GGPr27cugQYMOuV2fPn2A6FOUAz2xINrLa8uWLYwYMeKQ+x64v+39/R2NGkW6mm79Wuqr3HTTTSxatIh58+axYsUKLr30Us4777ym9/6BQACfz9dsH7/fz44dO5oeAYu0R0NDAy+//DIXXHABX//61w/63HTTTdTU1PD3v/8dgK997Wt8+umnvPLKKwcdy+zv9ZOYmAjQYojp378/H374IcFgsGnZa6+9dlBX4gNPBg4cE+Cjjz5i0aJF7b7WtLQ0Jk+ezIsvvsi8efPweDxcdNFFzbbZs2dPs5+TkpIYMGAAgUCg3edtjUsuuQSn08ndd9/d7Joheg8O1DVmzBiysrKYO3dus3v49NNPf+WIwllZWZx++uk8+eSTFBcXH3SOAw71+zsaNYp0OTFqyHzMAcwrr7zS9PO2bduM0+k0O3fubLbd2WefbW6//XZjTLSnR0JCgvn3v/9tIpGIWbdunRkyZIgBzMKFC49m+RJn5s2bZwDz6quvtrg+EomYrKwsM23aNGOMMTU1NWbYsGHG6XSa66+/3sydO9fce++95uSTTzbLly83xkR7y6SlpZnBgwebP/zhD+b55583mzdvNsYY8+abbxrAnHnmmeb3v/+9+eEPf2hyc3NN//79m/WiefLJJw1gLrzwQvN///d/5rbbbjNpaWnmuOOOM3369GlWI63oLXXAn//8ZwOY5OTkpmv6ouzsbHPZZZeZ+++/3zz++OPmO9/5jrEsy3z/+98/7HEP9JZ66aWXDrvdgd5S5eXlB62bM2eOAcwpp5xifvWrX5nf//735kc/+pEZOHCg+fWvf9203YGeX6eeeqr53//9X3PLLbeYtLQ0069fv6/sLbV8+XKTlJRkevToYW6
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(y_test, y_pred, alpha=0.5, c=color_column_test, cmap='viridis')\n",
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.title('Actual vs Predicted')\n",
"\n",
"plt.xlim(0, max(max(y_test), max(y_pred)))\n",
"plt.ylim(0, max(max(y_test), max(y_pred)))\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 798,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wc9Z34/9fMbC9a9d5tWbLlXrFxw6YaSAwJEFJoqXeQI5dL7hsul8KlcPldcrnc5RLCkQAJEAIJJIQONm6xwb3bstV7l3al1fb5/P5YWyAkGRdJK3s/z8dDD9iZ2Zn3rqyd937K+6MIIQSSJEmSJElxSI11AJIkSZIkSbEiEyFJkiRJkuKWTIQkSZIkSYpbMhGSJEmSJCluyURIkiRJkqS4JRMhSZIkSZLilkyEJEmSJEmKWzIRkiRJkiQpbslESJIkSZKkuCUTIUmSxoWiKHz3u9+NdRgxt3r1alavXj34uLa2FkVRePzxx2MW0wd9MEZJiicyEZKki8AvfvELFEVhyZIl532O5uZmvvvd77J///6xC2yS27RpE4qiDP4YjUaKi4u54447qK6ujnV452T79u1897vfpbe3N9ahSNIlxRDrACRJ+nBPPfUUhYWF7Ny5k8rKSqZOnXrO52hububBBx+ksLCQuXPnjn2Qk9g//MM/sGjRIkKhEHv37uWRRx7h5Zdf5tChQ2RnZ09oLAUFBfh8PoxG4zk9b/v27Tz44IPcddddJCYmjk9wkhSHZIuQJE1yNTU1bN++nf/8z/8kLS2Np556KtYhXXRWrFjBpz/9ae6++27+53/+hx//+Md0d3fzxBNPjPocr9c7LrEoioLFYkHTtHE5vyRJ50YmQpI0yT311FMkJSVx/fXX8/GPf3zURKi3t5d//Md/pLCwELPZTG5uLnfccQednZ1s2rSJRYsWAXD33XcPdhWdHqdSWFjIXXfdNeycHxw7EgwG+fa3v82CBQtwuVzY7XZWrFjB22+/fc6vq62tDYPBwIMPPjhsX0VFBYqi8POf/xyAUCjEgw8+SElJCRaLhZSUFJYvX86bb755ztcFWLNmDRBNMgG++93voigKR48e5ZOf/CRJSUksX7588Pgnn3ySBQsWYLVaSU5O5hOf+AQNDQ3DzvvII48wZcoUrFYrixcvZuvWrcOOGW2M0PHjx7n11ltJS0vDarVSWlrKN7/5zcH4vv71rwNQVFQ0+Purra0dlxglKZ7IrjFJmuSeeuopbr75ZkwmE7fffju//OUv2bVr12BiA9Df38+KFSs4duwY99xzD/Pnz6ezs5MXX3yRxsZGpk+fzr/927/x7W9/my984QusWLECgGXLlp1TLB6Ph0cffZTbb7+dz3/+8/T19fHrX/+aa665hp07d55Tl1tGRgarVq3i2Wef5Tvf+c6QfX/4wx/QNI1bbrkFiCYCDz30EJ/73OdYvHgxHo+H3bt3s3fvXq666qpzeg0AVVVVAKSkpAzZfsstt1BSUsIPf/hDhBAA/OAHP+Bb3/oWt956K5/73Ofo6Ojgf/7nf1i5ciX79u0b7Kb69a9/zRe/+EWWLVvGV77yFaqrq/nIRz5CcnIyeXl5Z4zn4MGDrFixAqPRyBe+8AUKCwupqqrir3/9Kz/4wQ+4+eabOXHiBL///e/56U9/SmpqKgBpaWkTFqMkXbKEJEmT1u7duwUg3nzzTSGEELqui9zcXHH//fcPOe7b3/62AMTzzz8/7By6rgshhNi1a5cAxGOPPTbsmIKCAnHnnXcO275q1SqxatWqwcfhcFgEAoEhx/T09IiMjAxxzz33DNkOiO985ztnfH2/+tWvBCAOHTo0ZPuMGTPEmjVrBh/PmTNHXH/99Wc810jefvttAYjf/OY3oqOjQzQ3N4uXX35ZFBYWCkVRxK5du4QQQnznO98RgLj99tuHPL+2tlZomiZ+8IMfDNl+6NAhYTAYBrcHg0GRnp4u5s6dO+T9eeSRRwQw5D2sqakZ9ntYuXKlcDqdoq6ubsh1Tv/uhBDiP/7jPwQgampqxj1GSYonsmtMkiaxp556ioyMDK644gogOr7ktttu45lnniESiQwe96c//Yk5c+Zw0003DTuHoihjFo+maZhMJgB0Xae7u5twOMzChQvZu3fvOZ/v5ptvxmAw8Ic//GFw2+HDhzl69Ci33Xbb4LbExESOHDnCyZMnzyvue+65h7S0NLKzs7n++uvxer088cQTLFy4cMhxX/rSl4Y8fv7559F1nVtvvZXOzs7Bn8zMTEpKSga7BHfv3k17eztf+tKXBt8fgLvuuguXy3XG2Do6OtiyZQv33HMP+fn5Q/adze9uImKUpEtZXCdCW7Zs4cYbbyQ7OxtFUfjzn/98Ts8/Pa7ggz92u318ApbiSiQS4ZlnnuGKK66gpqaGyspKKisrWbJkCW1tbWzYsGHw2KqqKmbOnDkhcT3xxBPMnj17cKxOWloaL7/8Mm63+5zPlZqaytq1a3n22WcHt/3hD3/AYDBw8803D277t3/7N3p7e5k2bRqzZs3i61//OgcPHjzr63z729/mzTffZOPGjRw8eJDm5mY+85nPDDuuqKhoyOOTJ08ihKCkpIS0tLQhP8eOHaO9vR2Auro6AEpKSoY8//R0/TM5PY3/fH9/ExGjJF3K4nqMkNfrZc6cOdxzzz1DPnTP1te+9rVh3yDXrl07ZOyGJJ2vjRs30tLSwjPPPMMzzzwzbP9TTz3F1VdfPSbXGq3lIRKJDJnd9OSTT3LXXXexfv16vv71r5Oeno6maTz00EOD427O1Sc+8Qnuvvtu9u/fz9y5c3n22WdZu3bt4DgYgJUrV1JVVcVf/vIX3njjDR599FF++tOf8vDDD/O5z33uQ68xa9Ysrrzyyg89zmq1Dnms6zqKovDqq6+OOMvL4XCcxSscXxdDjJI0mcV1InTddddx3XXXjbo/EAjwzW9+k9///vf09vYyc+ZMfvSjHw3OonE4HEM+ZA4cOMDRo0d5+OGHxzt0KQ489dRTpKen87//+7/D9j3//PO88MILPPzww1itVqZMmcLhw4fPeL4zdbMkJSWNWKivrq5uSGvBH//4R4qLi3n++eeHnO+Dg53Pxfr16/niF7842D124sQJHnjggWHHJScnc/fdd3P33XfT39/PypUr+e53v3tWidD5mjJlCkIIioqKmDZt2qjHFRQUANHWmdMz0iA6262mpoY5c+aM+tzT7+/5/v4mIkZJupTFddfYh7nvvvvYsWMHzzzzDAcPHuSWW27h2muvHXWcwqOPPsq0adMGZ+RI0vny+Xw8//zz3HDDDXz84x8f9nPffffR19fHiy++CMDHPvYxDhw4wAsvvDDsXOLU7KfTXbYjJTxTpkzhnXfeIRgMDm576aWXhk2/Pt3icPqcAO+++y47duw479eamJjINddcw7PPPsszzzyDyWRi/fr1Q47p6uoa8tjhcDB16lQCgcB5X/ds3HzzzWiaxoMPPjjkNUP0PTgd18KFC0lLS+Phhx8e8h4+/vjjH1oJOi0tjZUrV/Kb3/yG+vr6Ydc4bbTf30TEKEmXsrhuETqT+vp6HnvsMerr6wcrz37ta1/jtdde47HHHuOHP/zhkOP9fj9PPfUU3/jGN2IRrnSJefHFF+nr6+MjH/nIiPsvu+yyweKKt912G1//+tf54x//yC233MI999zDggUL6O7u5sUXX+Thhx9mzpw5TJkyhcTERB5++GGcTid2u50lS5ZQVFTE5z73Of74xz9y7bXXcuutt1JVVcWTTz7JlClThlz3hhtu4Pnnn+emm27i+uuvp6amhocffpgZM2bQ399/3q/3tttu49Of/jS/+MUvuOaaa4ZVTp4xYwarV69mwYIFJCcns3v3bv74xz9y3333nfc1z8aUKVP4/ve/zwMPPEBtbS3r16/H6XRSU1PDCy+8wBe+8AW+9rWvYTQa+f73v88Xv/hF1qxZw2233UZNTQ2PPfbYWY2/+e///m+WL1/O/Pnz+cIXvkBRURG1tbW8/PLLg0uiLFiwAIBvfvObfOITn8BoNHLjjTdOWIySdMmK0Wy1SQcQL7z
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(y_test, y_pred, alpha=0.5, c=color_column_test, cmap='viridis')\n",
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.title('Actual vs Predicted')\n",
"\n",
"plt.xlim(0, 3*10**7)\n",
"plt.ylim(0, 3*10**7)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 799,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOz9Z3wc53m3DR8z2yt67x0Ewd47RRVKonq1XCRZlmXnth07uePneR3HRY5t3Xljx7njxJKd2JKLbDVLlqxKFRZR7B0geu8di+11rufDkqAgAKwgARJz/H74gJ3ZmXN2Z2f+c13n+T8lIYRARUVFRUVFRWUWIk93ACoqKioqKioq04UqhFRUVFRUVFRmLaoQUlFRUVFRUZm1qEJIRUVFRUVFZdaiCiEVFRUVFRWVWYsqhFRUVFRUVFRmLaoQUlFRUVFRUZm1qEJIRUVFRUVFZdaiCiEVFRUVFRWVWYsqhFRUVC4JkiTx/e9/f7rDmHY2btzIxo0bR/9vaWlBkiSeeeaZaYvpk3wyRhWV2YQqhFRUrgB+8YtfIEkSK1asuOBtdHV18f3vf5+jR49OXWAznO3btyNJ0uifTqcjPz+fBx98kKampukO77zYvXs33//+93E4HNMdiorKVYV2ugNQUVE5O88++yy5ubns37+fhoYGCgsLz3sbXV1dPP744+Tm5rJw4cKpD3IG87d/+7csW7aMUCjE4cOH+dWvfsUbb7xBRUUF6enplzWWnJwcfD4fOp3uvN63e/duHn/8cR5++GFiY2MvTXAqKrMQdURIRWWG09zczO7du/m3f/s3kpKSePbZZ6c7pCuOdevW8dnPfpbPf/7z/PznP+cnP/kJQ0ND/Pa3v530PR6P55LEIkkSRqMRjUZzSbavoqJyfqhCSEVlhvPss88SFxfHli1buOeeeyYVQg6Hg7/7u78jNzcXg8FAZmYmDz74IAMDA2zfvp1ly5YB8PnPf350quhUnkpubi4PP/zwuG1+MnckGAzy3e9+lyVLlhATE4PFYmHdunVs27btvI+rt7cXrVbL448/Pm5ZbW0tkiTxn//5nwCEQiEef/xxioqKMBqNJCQksHbtWt59993z3i/Apk2bgKjIBPj+97+PJElUVVXx6U9/mri4ONauXTu6/h/+8AeWLFmCyWQiPj6eT33qU7S3t4/b7q9+9SsKCgowmUwsX76cDz/8cNw6k+UI1dTUcN9995GUlITJZKKkpIRvf/vbo/F985vfBCAvL2/0+2tpabkkMaqozCbUqTEVlRnOs88+y1133YVer+eBBx7gySef5MCBA6PCBsDtdrNu3Tqqq6t55JFHWLx4MQMDA7z22mt0dHQwZ84cfvCDH/Dd736Xxx57jHXr1gGwevXq84rF6XTyP//zPzzwwAN88YtfxOVy8etf/5rNmzezf//+85pyS0lJYcOGDbzwwgt873vfG7Ps+eefR6PRcO+99wJRIfDEE0/w6KOPsnz5cpxOJwcPHuTw4cNcf/3153UMAI2NjQAkJCSMef3ee++lqKiIH//4xwghAPjRj37Ed77zHe677z4effRR+vv7+fnPf8769es5cuTI6DTVr3/9a770pS+xevVqvvGNb9DU1MRtt91GfHw8WVlZZ4zn+PHjrFu3Dp1Ox2OPPUZubi6NjY389a9/5Uc/+hF33XUXdXV1/OlPf+JnP/sZiYmJACQlJV22GFVUrlqEiorKjOXgwYMCEO+++64QQghFUURmZqb4+te/Pma97373uwIQL7/88rhtKIoihBDiwIEDAhBPP/30uHVycnLEQw89NO71DRs2iA0bNoz+Hw6HRSAQGLPO8PCwSElJEY888siY1wHxve9974zH98tf/lIAoqKiYszrZWVlYtOmTaP/L1iwQGzZsuWM25qIbdu2CUD85je/Ef39/aKrq0u88cYbIjc3V0iSJA4cOCCEEOJ73/ueAMQDDzww5v0tLS1Co9GIH/3oR2Ner6ioEFqtdvT1YDAokpOTxcKFC8d8Pr/61a8EMOYzbG5uHvc9rF+/XthsNtHa2jpmP6e+OyGE+Nd//VcBiObm5kseo4rKbEKdGlNRmcE8++yzpKSkcM011wDR/JL777+f5557jkgkMrren//8ZxYsWMCdd945bhuSJE1ZPBqNBr1eD4CiKAwNDREOh1m6dCmHDx8+7+3dddddaLVann/++dHXKisrqaqq4v777x99LTY2lhMnTlBfX39BcT/yyCMkJSWRnp7Oli1b8Hg8/Pa3v2Xp0qVj1vvyl7885v+XX34ZRVG47777GBgYGP1LTU2lqKhodErw4MGD9PX18eUvf3n08wF4+OGHiYmJOWNs/f397Ny5k0ceeYTs7Owxy87lu7scMaqoXM3MaiG0c+dObr31VtLT05Ekib/85S/nvQ0hBD/5yU8oLi7GYDCQkZHBj370o6kPVmXWEYlEeO6557jmmmtobm6moaGBhoYGVqxYQW9vL++///7ouo2NjZSXl1+WuH77298yf/780VydpKQk3njjDUZGRs57W4mJiVx77bW88MILo689//zzaLVa7rrrrtHXfvCDH+BwOCguLmbevHl885vf5Pjx4+e8n+9+97u8++67fPDBBxw/fpyuri4+97nPjVsvLy9vzP/19fUIISgqKiIpKWnMX3V1NX19fQC0trYCUFRUNOb9p8r1z8SpMv4L/f4uR4wqKlczszpHyOPxsGDBAh555JExF93z4etf/zpbt27lJz/5CfPmzWNoaIihoaEpjlRlNvLBBx/Q3d3Nc889x3PPPTdu+bPPPssNN9wwJfuabOQhEomMqW76wx/+wMMPP8wdd9zBN7/5TZKTk9FoNDzxxBOjeTfny6c+9Sk+//nPc/ToURYuXMgLL7zAtddeO5oHA7B+/XoaGxt59dVX2bp1K//zP//Dz372M5566ikeffTRs+5j3rx5XHfddWddz2QyjflfURQkSeKtt96asMrLarWewxFeWq6EGFVUZjKzWgjddNNN3HTTTZMuDwQCfPvb3+ZPf/oTDoeD8vJy/uVf/mW0iqa6uponn3ySyspKSkpKgPFPlCoqF8qzzz5LcnIy//Vf/zVu2csvv8wrr7zCU089hclkoqCggMrKyjNu70zTLHFxcRMa9bW2to4ZLXjppZfIz8/n5ZdfHrO9TyY7nw933HEHX/rSl0anx+rq6vjWt741br34+Hg+//nP8/nPfx6328369ev5/ve/f05C6EIpKChACEFeXh7FxcWTrpeTkwNER2dOVaRBtNqtubmZBQsWTPreU5/vhX5/lyNGFZWrmVk9NXY2vvrVr7Jnzx6ee+45jh8/zr333suNN944mqfw17/+lfz8fF5//XXy8vLIzc3l0UcfVUeEVC4an8/Hyy+/zC233MI999wz7u+rX/0qLpeL1157DYC7776bY8eO8corr4zbljhZ/WSxWAAmFDwFBQXs3buXYDA4+trrr78+rvz61IjDqW0C7Nu3jz179lzwscbGxrJ582ZeeOEFnnvuOfR6PXfccceYdQYHB8f8b7VaKSwsJBAIXPB+z4W77roLjUbD448/PuaYIfoZnIpr6dKlJCUl8dRTT435DJ955pmzOkEnJSWxfv16fvOb39DW1jZuH6eY7Pu7HDGqqFzNzOoRoTPR1tbG008/TVtb26jz7D/8wz/w9ttv8/TTT/PjH/+YpqYmWltbefHFF/nd735HJBLh7/7u77jnnnv44IMPpvkIVK5kXnvtNVwuF7fddtuEy1euXDlqrnj//ffzzW9+k5deeol7772XRx55hCVLljA0NMRrr73GU089xYIFCygoKCA2NpannnoKm82GxWJhxYoV5OXl8eijj/LSSy9x4403ct9999HY2Mgf/vAHCgoKxuz3lltu4eWXX+bOO+9ky5YtNDc389RTT1FWVobb7b7g473//vv57Gc/yy9+8Qs2b948zjm5rKyMjRs3smTJEuLj4zl48CAvvfQSX/3qVy94n+dCQUEBP/zhD/nWt75FS0sLd9xxBzabjebmZl555RUee+wx/uEf/gGdTscPf/hDvvSlL7Fp0ybuv/9
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(y_test, y_pred, alpha=0.5, c=color_column_test, cmap='viridis')\n",
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.title('Actual vs Predicted')\n",
"\n",
"plt.xlim(0, 3*10**6)\n",
"plt.ylim(0, 3*10**6)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 800,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|--- Dlugotrwale_bezrobotni <= 25668.00\n",
"| |--- Bezrobotni_mezczyzni <= 19299.50\n",
"| | |--- Ludnosc_w_wieku_produkcyjnym <= 185983.00\n",
"| | | |--- Turysci_zagraniczni <= 38.50\n",
"| | | | |--- Ludnosc_kobiety <= 89935.50\n",
"| | | | | |--- Bezrobotni_powyzej_50_roku_zycia <= 340.75\n",
"| | | | | | |--- Turysci_ogolem <= 3.00\n",
"| | | | | | | |--- Ludnosc_w_wieku_produkcyjnym_niemobilnym <= 3444.50\n",
"| | | | | | | | |--- Dochody_podatek_od_srodkow_transportowych <= 1364063.19\n",
"| | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej <= 167.35\n",
"| | | | | | | | | | |--- Obiekty_ogolem <= 229.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 60\n",
"| | | | | | | | | | |--- Obiekty_ogolem > 229.00\n",
"| | | | | | | | | | | |--- value: [83037656.42]\n",
"| | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej > 167.35\n",
"| | | | | | | | | | |--- value: [89747833.80]\n",
"| | | | | | | | |--- Dochody_podatek_od_srodkow_transportowych > 1364063.19\n",
"| | | | | | | | | |--- Dochody_podatek_lesny <= 268.50\n",
"| | | | | | | | | | |--- value: [481929545.58]\n",
"| | | | | | | | | |--- Dochody_podatek_lesny > 268.50\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem <= 107.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 9\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem > 107.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
"| | | | | | | |--- Ludnosc_w_wieku_produkcyjnym_niemobilnym > 3444.50\n",
"| | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym_niemobilnym <= 3446.00\n",
"| | | | | | | | | |--- value: [67905000.00]\n",
"| | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym_niemobilnym > 3446.00\n",
"| | | | | | | | | |--- Ludnosc_na_1_km2 <= 228.35\n",
"| | | | | | | | | | |--- Saldo_migracji <= -261.50\n",
"| | | | | | | | | | | |--- value: [59513803.02]\n",
"| | | | | | | | | | |--- Saldo_migracji > -261.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 43\n",
"| | | | | | | | | |--- Ludnosc_na_1_km2 > 228.35\n",
"| | | | | | | | | | |--- Wojewodztwo_Opolskie <= 0.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 46\n",
"| | | | | | | | | | |--- Wojewodztwo_Opolskie > 0.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 11\n",
"| | | | | | |--- Turysci_ogolem > 3.00\n",
"| | | | | | | |--- Wymeldowania_na_wies_ogolem <= 15.50\n",
"| | | | | | | | |--- Bezrobotni_powyzej_50_roku_zycia <= 96.50\n",
"| | | | | | | | | |--- Miejsca_noclegowe_ogolem <= 12.00\n",
"| | | | | | | | | | |--- value: [160629826.38]\n",
"| | | | | | | | | |--- Miejsca_noclegowe_ogolem > 12.00\n",
"| | | | | | | | | | |--- value: [171383125.19]\n",
"| | | | | | | | |--- Bezrobotni_powyzej_50_roku_zycia > 96.50\n",
"| | | | | | | | | |--- value: [342766250.37]\n",
"| | | | | | | |--- Wymeldowania_na_wies_ogolem > 15.50\n",
"| | | | | | | | |--- Wymeldowania_na_wies_ogolem <= 19.50\n",
"| | | | | | | | | |--- Wplywy_z_oplaty_targowej <= 3406.00\n",
"| | | | | | | | | | |--- Wymeldowania_mezczyzni <= 28.50\n",
"| | | | | | | | | | | |--- value: [68553250.07]\n",
"| | | | | | | | | | |--- Wymeldowania_mezczyzni > 28.50\n",
"| | | | | | | | | | | |--- value: [117085504.63]\n",
"| | | | | | | | | |--- Wplywy_z_oplaty_targowej > 3406.00\n",
"| | | | | | | | | | |--- Wojewodztwo_Lubelskie <= 0.50\n",
"| | | | | | | | | | | |--- value: [447130.67]\n",
"| | | | | | | | | | |--- Wojewodztwo_Lubelskie > 0.50\n",
"| | | | | | | | | | | |--- value: [945154.23]\n",
"| | | | | | | | |--- Wymeldowania_na_wies_ogolem > 19.50\n",
"| | | | | | | | | |--- Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym <= 1803.00\n",
"| | | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_prywatnych <= 3761.75\n",
"| | | | | | | | | | | |--- value: [31956702.57]\n",
"| | | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_prywatnych > 3761.75\n",
"| | | | | | | | | | | |--- truncated branch of depth 18\n",
"| | | | | | | | | |--- Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym > 1803.00\n",
"| | | | | | | | | | |--- Bezrobotne_kobiety <= 293.25\n",
"| | | | | | | | | | | |--- truncated branch of depth 4\n",
"| | | | | | | | | | |--- Bezrobotne_kobiety > 293.25\n",
"| | | | | | | | | | | |--- truncated branch of depth 6\n",
"| | | | | |--- Bezrobotni_powyzej_50_roku_zycia > 340.75\n",
"| | | | | | |--- Ludnosc_kobiety_w_wieku_poprodukcyjnym <= 2095.50\n",
"| | | | | | | |--- value: [164709707.63]\n",
"| | | | | | |--- Ludnosc_kobiety_w_wieku_poprodukcyjnym > 2095.50\n",
"| | | | | | | |--- Gestosc_zaludnienia <= 3.93\n",
"| | | | | | | | |--- Obiekty_caloroczne <= 0.50\n",
"| | | | | | | | | |--- Wynagrodzenie_ogolem <= 3259.15\n",
"| | | | | | | | | | |--- value: [150089223.44]\n",
"| | | | | | | | | |--- Wynagrodzenie_ogolem > 3259.15\n",
"| | | | | | | | | | |--- Zameldowania_z_miast_kobiety <= 417.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 6\n",
"| | | | | | | | | | |--- Zameldowania_z_miast_kobiety > 417.50\n",
"| | | | | | | | | | | |--- value: [93414400.87]\n",
"| | | | | | | | |--- Obiekty_caloroczne > 0.50\n",
"| | | | | | | | | |--- Zmiana_liczby_ludnosci <= -20.20\n",
"| | | | | | | | | | |--- value: [63239190.39]\n",
"| | | | | | | | | |--- Zmiana_liczby_ludnosci > -20.20\n",
"| | | | | | | | | | |--- Bezrobotni_mezczyzni <= 1110.25\n",
"| | | | | | | | | | | |--- truncated branch of depth 23\n",
"| | | | | | | | | | |--- Bezrobotni_mezczyzni > 1110.25\n",
"| | | | | | | | | | | |--- truncated branch of depth 20\n",
"| | | | | | | |--- Gestosc_zaludnienia > 3.93\n",
"| | | | | | | | |--- value: [135056683.01]\n",
"| | | | |--- Ludnosc_kobiety > 89935.50\n",
"| | | | | |--- Wplywy_z_oplaty_skarbowej <= 1133715.12\n",
"| | | | | | |--- value: [180173724.25]\n",
"| | | | | |--- Wplywy_z_oplaty_skarbowej > 1133715.12\n",
"| | | | | | |--- Ludnosc_mezczyzni_w_wieku_poprodukcyjnym <= 11972.00\n",
"| | | | | | | |--- Wymeldowania_do_miast_mezczyzni <= 326.00\n",
"| | | | | | | | |--- Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym <= 21084.50\n",
"| | | | | | | | | |--- value: [30977320.85]\n",
"| | | | | | | | |--- Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym > 21084.50\n",
"| | | | | | | | | |--- Dochody_z_najmu_i_dzierzawy <= 9119502.75\n",
"| | | | | | | | | | |--- value: [59862743.58]\n",
"| | | | | | | | | |--- Dochody_z_najmu_i_dzierzawy > 9119502.75\n",
"| | | | | | | | | | |--- value: [70407306.85]\n",
"| | | | | | | |--- Wymeldowania_do_miast_mezczyzni > 326.00\n",
"| | | | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym <= 17482.50\n",
"| | | | | | | | | |--- value: [102472326.23]\n",
"| | | | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym > 17482.50\n",
"| | | | | | | | | |--- value: [127981984.10]\n",
"| | | | | | |--- Ludnosc_mezczyzni_w_wieku_poprodukcyjnym > 11972.00\n",
"| | | | | | | |--- Zameldowania_kobiety <= 1158.50\n",
"| | | | | | | | |--- Dochody_podatek_od_nieruchomosci <= 183512512.00\n",
"| | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_prywatnych <= 12303453.00\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_mezczyzni <= 439.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 5\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_mezczyzni > 439.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
"| | | | | | | | | |--- Udzialy_w_podatkach_dochodowych_od_osob_prywatnych > 12303453.00\n",
"| | | | | | | | | | |--- Dochody_podatek_lesny <= 48150.42\n",
"| | | | | | | | | | | |--- truncated branch of depth 8\n",
"| | | | | | | | | | |--- Dochody_podatek_lesny > 48150.42\n",
"| | | | | | | | | | | |--- truncated branch of depth 9\n",
"| | | | | | | | |--- Dochody_podatek_od_nieruchomosci > 183512512.00\n",
"| | | | | | | | | |--- Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym <= 31586.00\n",
"| | | | | | | | | | |--- Wplywy_z_oplaty_targowej <= 595514.12\n",
"| | | | | | | | | | | |--- value: [66090255.07]\n",
"| | | | | | | | | | |--- Wplywy_z_oplaty_targowej > 595514.12\n",
"| | | | | | | | | | | |--- value: [91810436.67]\n",
"| | | | | | | | | |--- Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym > 31586.00\n",
"| | | | | | | | | | |--- value: [3978361.07]\n",
"| | | | | | | |--- Zameldowania_kobiety > 1158.50\n",
"| | | | | | | | |--- Dochody_dofinansowanie_inwestycyjne <= 53727986.00\n",
"| | | | | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym <= 17278.50\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem <= 1251.00\n",
"| | | | | | | | | | | |--- value: [29960100.33]\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_ogolem > 1251.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 4\n",
"| | | | | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym > 17278.50\n",
"| | | | | | | | | | |--- Saldo_migracji_na_1000_ludnosci <= -2.45\n",
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
"| | | | | | | | | | |--- Saldo_migracji_na_1000_ludnosci > -2.45\n",
"| | | | | | | | | | | |--- truncated branch of depth 7\n",
"| | | | | | | | |--- Dochody_dofinansowanie_inwestycyjne > 53727986.00\n",
"| | | | | | | | | |--- value: [144736499.65]\n",
"| | | |--- Turysci_zagraniczni > 38.50\n",
"| | | | |--- Wplywy_z_oplaty_eksploatacyjnej <= -4393.63\n",
"| | | | | |--- value: [964190154.28]\n",
"| | | | |--- Wplywy_z_oplaty_eksploatacyjnej > -4393.63\n",
"| | | | | |--- Ludnosc_kobiety_w_wieku_poprodukcyjnym <= 17303.50\n",
"| | | | | | |--- Saldo_migracji_na_1000_ludnosci <= 19.15\n",
"| | | | | | | |--- Dochody_podatek_od_nieruchomosci <= 1291011.81\n",
"| | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym <= 3086.00\n",
"| | | | | | | | | |--- value: [171383125.19]\n",
"| | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym > 3086.00\n",
"| | | | | | | | | |--- Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym <= 641.00\n",
"| | | | | | | | | | |--- value: [122764445.52]\n",
"| | | | | | | | | |--- Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym > 641.00\n",
"| | | | | | | | | | |--- value: [146824455.62]\n",
"| | | | | | | |--- Dochody_podatek_od_nieruchomosci > 1291011.81\n",
"| | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym_niemobilnym <= 14363.50\n",
"| | | | | | | | | |--- Turysci_ogolem <= 289.00\n",
"| | | | | | | | | | |--- value: [162582295.42]\n",
"| | | | | | | | | |--- Turysci_ogolem > 289.00\n",
"| | | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej <= 70.60\n",
"| | | | | | | | | | | |--- value: [158370335.96]\n",
"| | | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej > 70.60\n",
"| | | | | | | | | | | |--- truncated branch of depth 26\n",
"| | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym_niemobilnym > 14363.50\n",
"| | | | | | | | | |--- Ludnosc_kobiety <= 29627.00\n",
"| | | | | | | | | | |--- value: [675624217.52]\n",
"| | | | | | | | | |--- Ludnosc_kobiety > 29627.00\n",
"| | | | | | | | | | |--- Dochody_razem <= 112764740.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 3\n",
"| | | | | | | | | | |--- Dochody_razem > 112764740.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 16\n",
"| | | | | | |--- Saldo_migracji_na_1000_ludnosci > 19.15\n",
"| | | | | | | |--- Wymeldowania_na_wies_mezczyzni <= 12.50\n",
"| | | | | | | | |--- value: [819103731.56]\n",
"| | | | | | | |--- Wymeldowania_na_wies_mezczyzni > 12.50\n",
"| | | | | | | | |--- Ludnosc_kobiety_w_wieku_poprodukcyjnym <= 2525.50\n",
"| | | | | | | | | |--- Dlugotrwale_bezrobotni <= 341.50\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_kobiety <= 19.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
"| | | | | | | | | | |--- Wymeldowania_na_wies_kobiety > 19.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 4\n",
"| | | | | | | | | |--- Dlugotrwale_bezrobotni > 341.50\n",
"| | | | | | | | | | |--- value: [45343367.61]\n",
"| | | | | | | | |--- Ludnosc_kobiety_w_wieku_poprodukcyjnym > 2525.50\n",
"| | | | | | | | | |--- Wplywy_z_oplaty_eksploatacyjnej <= 50673.37\n",
"| | | | | | | | | | |--- value: [61979737.09]\n",
"| | | | | | | | | |--- Wplywy_z_oplaty_eksploatacyjnej > 50673.37\n",
"| | | | | | | | | | |--- value: [121810368.38]\n",
"| | | | | |--- Ludnosc_kobiety_w_wieku_poprodukcyjnym > 17303.50\n",
"| | | | | | |--- Dochody_podatek_od_spadkow <= 1733235.94\n",
"| | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej <= 88.95\n",
"| | | | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym <= 17244.00\n",
"| | | | | | | | | |--- Gestosc_zaludnienia <= 1.62\n",
"| | | | | | | | | | |--- value: [114787825.97]\n",
"| | | | | | | | | |--- Gestosc_zaludnienia > 1.62\n",
"| | | | | | | | | | |--- value: [118788666.24]\n",
"| | | | | | | | |--- Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym > 17244.00\n",
"| | | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym <= 125722.00\n",
"| | | | | | | | | | |--- value: [85637902.19]\n",
"| | | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym > 125722.00\n",
"| | | | | | | | | | |--- value: [70210346.90]\n",
"| | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej > 88.95\n",
"| | | | | | | | |--- Dochody_z_najmu_i_dzierzawy <= 6061333.00\n",
"| | | | | | | | | |--- Wymeldowania_ogolem <= 2041.50\n",
"| | | | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym <= 109787.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
"| | | | | | | | | | |--- Ludnosc_w_wieku_produkcyjnym > 109787.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
"| | | | | | | | | |--- Wymeldowania_ogolem > 2041.50\n",
"| | | | | | | | | | |--- value: [110484937.69]\n",
"| | | | | | | | |--- Dochody_z_najmu_i_dzierzawy > 6061333.00\n",
"| | | | | | | | | |--- Ludnosc_mezczyzni <= 71811.00\n",
"| | | | | | | | | | |--- value: [255623586.25]\n",
"| | | | | | | | | |--- Ludnosc_mezczyzni > 71811.00\n",
"| | | | | | | | | | |--- value: [199270168.64]\n",
"| | | | | | |--- Dochody_podatek_od_spadkow > 1733235.94\n",
"| | | | | | | |--- Dochody_dofinansowanie_razem <= 7532695.62\n",
"| | | | | | | | |--- Dochody_razem <= 517752880.00\n",
"| | | | | | | | | |--- value: [292699374.69]\n",
"| | | | | | | | |--- Dochody_razem > 517752880.00\n",
"| | | | | | | | | |--- value: [257797653.62]\n",
"| | | | | | | |--- Dochody_dofinansowanie_razem > 7532695.62\n",
"| | | | | | | | |--- value: [381077102.66]\n",
"| | |--- Ludnosc_w_wieku_produkcyjnym > 185983.00\n",
"| | | |--- Wynagrodzenie_w_relacji_do_sredniej <= 133.45\n",
"| | | | |--- Turysci_ogolem <= 280537.00\n",
"| | | | | |--- Ludnosc_w_wieku_produkcyjnym <= 187736.50\n",
"| | | | | | |--- value: [368605477.02]\n",
"| | | | | |--- Ludnosc_w_wieku_produkcyjnym > 187736.50\n",
"| | | | | | |--- Wplywy_z_oplaty_targowej <= 2805425.38\n",
"| | | | | | | |--- Dochody_podatek_rolny <= 2382003.62\n",
"| | | | | | | | |--- Bezrobotni_mezczyzni <= 3058.50\n",
"| | | | | | | | | |--- Dochody_z_uslug <= 82099316.00\n",
"| | | | | | | | | | |--- Ludnosc_kobiety_w_wieku_poprodukcyjnym <= 59429.50\n",
"| | | | | | | | | | | |--- value: [178152459.17]\n",
"| | | | | | | | | | |--- Ludnosc_kobiety_w_wieku_poprodukcyjnym > 59429.50\n",
"| | | | | | | | | | | |--- truncated branch of depth 2\n",
"| | | | | | | | | |--- Dochody_z_uslug > 82099316.00\n",
"| | | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej <= 110.55\n",
"| | | | | | | | | | | |--- truncated branch of depth 5\n",
"| | | | | | | | | | |--- Wynagrodzenie_w_relacji_do_sredniej > 110.55\n",
"| | | | | | | | | | | |--- truncated branch of depth 5\n",
"| | | | | | | | |--- Bezrobotni_mezczyzni > 3058.50\n",
"| | | | | | | | | |--- Wymeldowania_mezczyzni <= 659.50\n",
"| | | | | | | | | | |--- Dochody_podatek_PCC <= 55368212.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 5\n",
"| | | | | | | | | | |--- Dochody_podatek_PCC > 55368212.00\n",
"| | | | | | | | | | | |--- value: [123333674.24]\n",
"| | | | | | | | | |--- Wymeldowania_mezczyzni > 659.50\n",
"| | | | | | | | | | |--- Wplywy_z_innych_lokalnych_oplat <= 71795516.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 7\n",
"| | | | | | | | | | |--- Wplywy_z_innych_lokalnych_oplat > 71795516.00\n",
"| | | | | | | | | | | |--- truncated branch of depth 9\n",
"| | | | | | | |--- Dochody_podatek_rolny > 2382003.62\n",
"| | | | | | | | |--- value: [315084220.12]\n",
"| | | | | | |--- Wplywy_z_oplaty_targowej > 2805425.38\n",
"| | | | | | | |--- Zameldowania_z_miast_ogolem <= 4236.50\n",
"| | | | | | | | |--- value: [258878709.16]\n",
"| | | | | | | |--- Zameldowania_z_miast_ogolem > 4236.50\n",
"| | | | | | | | |--- value: [344302462.59]\n",
"| | | | |--- Turysci_ogolem > 280537.00\n",
"| | | | | |--- Dochody_z_najmu_i_dzierzawy <= 70666956.00\n",
"| | | | | | |--- Wojewodztwo_Slaskie <= 0.50\n",
"| | | | | | | |--- Wymeldowania_do_miast_mezczyzni <= 976.00\n",
"| | | | | | | | |--- value: [570198401.95]\n",
"| | | | | | | |--- Wymeldowania_do_miast_mezczyzni > 976.00\n",
"| | | | | | | | |--- value: [561559712.26]\n",
"| | | | | | |--- Wojewodztwo_Slaskie > 0.50\n",
"| | | | | | | |--- value: [773787942.19]\n",
"| | | | | |--- Dochody_z_najmu_i_dzierzawy > 70666956.00\n",
"| | | | | | |--- Wplywy_z_oplaty_targowej <= 2307080.88\n",
"| | | | | | | |--- Bezrobotni_mezczyzni <= 8035.25\n",
"| | | | | | | | |--- value: [251006633.98]\n",
"| | | | | | | |--- Bezrobotni_mezczyzni > 8035.25\n",
"| | | | | | | | |--- value: [336825560.70]\n",
"| | | | | | |--- Wplywy_z_oplaty_targowej > 2307080.88\n",
"| | | | | | | |--- value: [546788438.22]\n",
"| | | |--- Wynagrodzenie_w_relacji_do_sredniej > 133.45\n",
"| | | | |--- Dochody_podatek_rolny <= 1140017.00\n",
"| | | | | |--- value: [845270363.07]\n",
"| | | | |--- Dochody_podatek_rolny > 1140017.00\n",
"| | | | | |--- value: [351302277.11]\n",
"| |--- Bezrobotni_mezczyzni > 19299.50\n",
"| | |--- Saldo_migracji <= -715.50\n",
"| | | |--- value: [1636234649.62]\n",
"| | |--- Saldo_migracji > -715.50\n",
"| | | |--- value: [1156609061.12]\n",
"|--- Dlugotrwale_bezrobotni > 25668.00\n",
"| |--- value: [4888827044.14]\n",
"\n"
]
}
],
"source": [
"print(export_text(model, feature_names=feature_names))"
]
},
{
"cell_type": "code",
"execution_count": 801,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.60288 — Dlugotrwale_bezrobotni\n",
"0.13303 — Bezrobotni_mezczyzni\n",
"0.06044 — Ludnosc_w_wieku_produkcyjnym\n",
"0.02626 — Wynagrodzenie_w_relacji_do_sredniej\n",
"0.02598 — Turysci_ogolem\n",
"0.02175 — Wplywy_z_oplaty_eksploatacyjnej\n",
"0.01448 — Wymeldowania_na_wies_mezczyzni\n",
"0.01030 — Turysci_zagraniczni\n",
"0.01009 — Ludnosc_kobiety\n",
"0.00950 — Ludnosc_kobiety_w_wieku_poprodukcyjnym\n",
"0.00926 — Dochody_podatek_rolny\n",
"0.00725 — Dochody_podatek_lesny\n",
"0.00470 — Wplywy_z_oplaty_targowej\n",
"0.00468 — Dochody_z_najmu_i_dzierzawy\n",
"0.00453 — Saldo_migracji\n",
"0.00381 — Wymeldowania_na_wies_ogolem\n",
"0.00296 — Saldo_migracji_na_1000_ludnosci\n",
"0.00290 — Wynagrodzenie_ogolem\n",
"0.00288 — Ludnosc_kobiety_w_wieku_przedprodukcyjnym\n",
"0.00223 — Dochody_razem\n",
"0.00206 — Wplywy_z_innych_lokalnych_oplat\n",
"0.00193 — Zameldowania_kobiety\n",
"0.00190 — Dochody_podatek_od_spadkow\n",
"0.00180 — Wymeldowania_mezczyzni\n",
"0.00156 — Dochody_podatek_od_nieruchomosci\n",
"0.00156 — Wplywy_z_oplaty_skarbowej\n",
"0.00147 — Ludnosc_w_wieku_produkcyjnym_niemobilnym\n",
"0.00141 — Dochody_z_uslug\n",
"0.00139 — Dochody_podatek_PCC\n",
"0.00136 — Zameldowania_z_miast_kobiety\n",
"0.00134 — Zmiana_liczby_ludnosci\n",
"0.00126 — Bezrobotni_powyzej_50_roku_zycia\n",
"0.00122 — Ludnosc_w_wieku_produkcyjnym_mobilnym\n",
"0.00119 — Miejsca_noclegowe_ogolem\n",
"0.00105 — Ludnosc_mezczyzni_w_wieku_poprodukcyjnym\n",
"0.00097 — Wojewodztwo_Slaskie\n",
"0.00076 — Dochody_z_majatku\n",
"0.00071 — Udzialy_w_podatkach_dochodowych_od_osob_prywatnych\n",
"0.00064 — Zameldowania_ze_wsi_ogolem\n",
"0.00060 — Zameldowania_ze_wsi_kobiety\n",
"0.00054 — Dochody_podatek_od_srodkow_transportowych\n",
"0.00051 — Gestosc_zaludnienia\n",
"0.00049 — Dochody_dofinansowanie_razem\n",
"0.00044 — Wymeldowania_do_miast_mezczyzni\n",
"0.00044 — Ludnosc_kobiety_w_wieku_produkcyjnym_niemobilnym\n",
"0.00040 — Dochody_dofinansowanie_inwestycyjne\n",
"0.00039 — Zameldowania_z_miast_ogolem\n",
"0.00037 — Bezrobotni_ogolem\n",
"0.00035 — Udzialy_w_podatkach_dochodowych_razem\n",
"0.00033 — Zameldowania_ze_wsi_mezczyzni\n",
"0.00032 — Obiekty_caloroczne\n",
"0.00031 — Ludnosc_na_1_km2\n",
"0.00028 — Wymeldowania_kobiety\n",
"0.00026 — Wojewodztwo_Opolskie\n",
"0.00026 — Ludnosc_mezczyzni_w_wieku_produkcyjnym_niemobilnym\n",
"0.00024 — Zameldowania_z_miast_mezczyzni\n",
"0.00024 — Obiekty_ogolem\n",
"0.00024 — Wojewodztwo_Dolnoslaskie\n",
"0.00021 — Miejsca_noclegowe_caloroczne\n",
"0.00021 — Bezrobotne_kobiety\n",
"0.00020 — Wskaznik_urbanizacji\n",
"0.00020 — Ludnosc_w_wieku_przedprodukcyjnym\n",
"0.00019 — Wojewodztwo_Lubelskie\n",
"0.00019 — Wymeldowania_ogolem\n",
"0.00019 — Bezrobotni_do_25_roku_zycia\n",
"0.00017 — Dochody_podatek_od_dzialalnosci_gospodarczej\n",
"0.00013 — Wojewodztwo_Podlaskie\n",
"0.00012 — Wymeldowania_na_wies_kobiety\n",
"0.00012 — Dochody_podatek_odrebne_ustawy\n",
"0.00011 — Ludnosc_mezczyzni\n",
"0.00009 — Gmina_miejsko_wiejska\n",
"0.00008 — Ludnosc_mezczyzni_w_wieku_produkcyjnym\n",
"0.00007 — Zameldowania_mezczyzni\n",
"0.00007 — Powierzchnia\n",
"0.00006 — Ludnosc\n",
"0.00006 — Wymeldowania_do_miast_ogolem\n",
"0.00005 — Ludnosc_w_wieku_poprodukcyjnym\n",
"0.00005 — Ludnosc_ogolem\n",
"0.00004 — Wojewodztwo_Malopolskie\n",
"0.00004 — Wymeldowania_do_miast_kobiety\n",
"0.00003 — Ludnosc_mezczyzni_w_wieku_przedprodukcyjnym\n",
"0.00003 — Wojewodztwo_Lodzkie\n",
"0.00003 — Udzialy_w_podatkach_dochodowych_od_osob_fizycznych\n",
"0.00002 — Wojewodztwo_Wielkopolskie\n",
"0.00002 — Ludnosc_mezczyzni_w_wieku_produkcyjnym_mobilnym\n",
"0.00002 — Ludnosc_kobiety_w_wieku_produkcyjnym_mobilnym\n",
"0.00002 — Wojewodztwo_Kujawsko_Pomorskie\n",
"0.00002 — Wojewodztwo_Lubuskie\n",
"0.00002 — Zameldowania_ogolem\n",
"0.00001 — Wojewodztwo_Podkarpackie\n",
"0.00001 — Ludnosc_kobiety_w_wieku_produkcyjnym\n",
"0.00001 — Wojewodztwo_Pomorskie\n",
"0.00001 — Gmina_wiejska\n",
"0.00000 — Wojewodztwo_Zachodniopomorskie\n",
"0.00000 — Wojewodztwo_Mazowieckie\n",
"0.00000 — Gmina_miejska\n",
"0.00000 — Wojewodztwo_Warminsko_Mazurskie\n",
"0.00000 — Wojewodztwo_Swietokrzyskie\n"
]
}
],
"source": [
"feature_importance = dict(zip(feature_names, model.feature_importances_))\n",
"for feature, importance in sorted(feature_importance.items(), key=lambda x: x[1], reverse=True):\n",
" print(f'{importance:.5f} \\u2014 {feature}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}