In [1]:
indiaMapsLink = 'https://github.com/FabioGM-02/Tarea-5/raw/refs/heads/main/Maps/indiaMaps_7755.gpkg'
import geopandas as gpd
gpd.list_layers(indiaMapsLink)
Out[1]:
name | geometry_type | |
---|---|---|
0 | country | MultiPolygon |
1 | cities | Point |
2 | rivers | MultiLineString |
3 | centroid | Point |
4 | airports | Point |
5 | states | MultiPolygon |
In [2]:
states=gpd.read_file(indiaMapsLink,layer='states')
In [3]:
countries = gpd.read_file(indiaMapsLink, layer='country')
# Reprojecting the states
states_fixed = states.copy()
states_fixed = states_fixed.set_crs(epsg=32643, allow_override=True)
states_fixed = states_fixed.to_crs(countries.crs)
print("Fixed States bounds:", states_fixed.total_bounds)
print("Countries bounds:", countries.total_bounds)
states = states_fixed
Fixed States bounds: [2819242.64544075 2177754.67403902 5679168.18314899 5260289.41141887] Countries bounds: [2814964.32964461 2176798.65900902 5676285.14421876 5260475.76432007]
In [4]:
import pandas as pd
# Loading an excel where all our variables are at district-level
hdi_url = 'https://github.com/alonso-mendoza/Tarea-6/raw/refs/heads/main/data/india%20HDI.xlsx'
hdi = pd.read_excel(hdi_url)
In [5]:
# Merging our geometry with the data
districts=hdi.merge(states,left_on='District', right_on='NAME_2')
districts.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 633 entries, 0 to 632 Data columns (total 23 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 State/Union Territory 633 non-null object 1 District 633 non-null object 2 L 633 non-null float64 3 E 633 non-null float64 4 H 633 non-null float64 5 HDIa 633 non-null float64 6 Rank in HDIa 633 non-null int64 7 Inequality in Progress 633 non-null float64 8 ID_0 633 non-null int64 9 ISO 633 non-null object 10 NAME_0 633 non-null object 11 ID_1 633 non-null int64 12 NAME_1 633 non-null object 13 ID_2 633 non-null int64 14 NAME_2 633 non-null object 15 HASC_2 600 non-null object 16 CCN_2 633 non-null int64 17 CCA_2 0 non-null object 18 TYPE_2 633 non-null object 19 ENGTYPE_2 633 non-null object 20 NL_NAME_2 0 non-null object 21 VARNAME_2 184 non-null object 22 geometry 633 non-null geometry dtypes: float64(5), geometry(1), int64(5), object(12) memory usage: 113.9+ KB
In [6]:
# We'll keep only the districts and the variables
districts = districts[['NAME_1','District', 'L', 'E', 'H', 'HDIa', 'geometry']]
# Creating the GeoDataFrame
districts = gpd.GeoDataFrame(districts, geometry='geometry', crs=7755)
districts.head()
Out[6]:
NAME_1 | District | L | E | H | HDIa | geometry | |
---|---|---|---|---|---|---|---|
0 | Andaman and Nicobar | South Andaman | 0.919 | 0.952 | 0.979 | 0.950 | MULTIPOLYGON (((5503734.054 2797754.016, 55059... |
1 | Andaman and Nicobar | North and Middle Andaman | 0.526 | 0.909 | 0.993 | 0.788 | MULTIPOLYGON (((5455144.764 3082535.015, 54552... |
2 | Andhra Pradesh | West Godavari | 0.867 | 0.871 | 0.981 | 0.905 | MULTIPOLYGON (((4164885.629 3285520.16, 416540... |
3 | Andhra Pradesh | Y.S.R. | 0.864 | 0.872 | 0.928 | 0.888 | MULTIPOLYGON (((3887677.643 3040941.348, 38881... |
4 | Andhra Pradesh | Chittoor | 0.768 | 0.897 | 0.970 | 0.875 | MULTIPOLYGON (((3836090.177 2903126.194, 38367... |
In [7]:
districts.rename(columns={'NAME_1':'State/Union','L':'Wealth_Index_Above_2nd_Quintile','E':'Secondary_School_Attendance','H':'Prob_Survival_First5Yrs','HDIa':'HDI'},inplace=True)
districts.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 633 entries, 0 to 632 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 State/Union 633 non-null object 1 District 633 non-null object 2 Wealth_Index_Above_2nd_Quintile 633 non-null float64 3 Secondary_School_Attendance 633 non-null float64 4 Prob_Survival_First5Yrs 633 non-null float64 5 HDI 633 non-null float64 6 geometry 633 non-null geometry dtypes: float64(4), geometry(1), object(2) memory usage: 34.7+ KB
Exercise 6
In [9]:
!pip install mapclassify
Requirement already satisfied: mapclassify in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (2.8.1) Requirement already satisfied: networkx>=2.7 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from mapclassify) (3.4.2) Requirement already satisfied: numpy>=1.23 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from mapclassify) (2.1.3) Requirement already satisfied: pandas!=1.5.0,>=1.4 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from mapclassify) (2.2.3) Requirement already satisfied: scikit-learn>=1.0 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from mapclassify) (1.6.1) Requirement already satisfied: scipy>=1.8 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from mapclassify) (1.15.2) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2023.3) Requirement already satisfied: joblib>=1.2.0 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from scikit-learn>=1.0->mapclassify) (1.4.2) Requirement already satisfied: threadpoolctl>=3.1.0 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from scikit-learn>=1.0->mapclassify) (3.6.0) Requirement already satisfied: six>=1.5 in c:\users\mendo\anaconda3\envs\dataespacial__31111\lib\site-packages (from python-dateutil>=2.8.2->pandas!=1.5.0,>=1.4->mapclassify) (1.16.0)
In [10]:
# statistics
districts.HDI.describe()
Out[10]:
count 633.000000 mean 0.760378 std 0.135514 min 0.429000 25% 0.658000 50% 0.773000 75% 0.879000 max 1.000000 Name: HDI, dtype: float64
In [11]:
import seaborn as sea
sea.histplot(districts.HDI, color='yellow')
Out[11]:
<Axes: xlabel='HDI', ylabel='Count'>
In [12]:
districts.explore(
column="HDI",
scheme="fisherjenks",
legend=True,
tooltip=False,
popup=['State/Union', 'District'], # show popup (on-click)
legend_kwds=dict(colorbar=False)
)
Out[12]:
Make this Notebook Trusted to load map: File -> Trust Notebook