linkWorldMap="https://github.com/CienciaDeDatosEspacial/dataSets/raw/refs/heads/main/WORLD/worldMaps.gpkg"
linkBrazil="https://github.com/CienciaDeDatosEspacial/dataSets/raw/refs/heads/main/BRAZIL/brazil_5880.gpkg"
linkIndicators="https://github.com/CienciaDeDatosEspacial/dataSets/raw/refs/heads/main/WORLD/worldindicators.json"

linkSeaPorts='https://github.com/CienciaDeDatosEspacial/dataSets/raw/refs/heads/main/WORLD/seaports.csv'

import geopandas as gpd

#world 
world_rivers=gpd.read_file(linkWorldMap,layer='rivers')
#brazil 
brazil5880=gpd.read_file(linkBrazil,layer='country')
airports_brazil5880=gpd.read_file(linkBrazil,layer='airports')
states_brazil5880=gpd.read_file(linkBrazil,layer='states')
municipalities_brazil5880=gpd.read_file(linkBrazil,layer='municipalities')
#some indicators
indicators=gpd.read_file(linkIndicators)

# the seaports
import pandas as pd 
infoseaports=pd.read_csv(linkSeaPorts)

# the sesports data has too manny columns:
infoseaports.columns.to_list()

#rename
infoseaports.rename(columns={'Main Port Name':'seaport_name','Country Code':'country_name'},inplace=True)
#keep few columns
infoseaports=infoseaports.loc[:,['seaport_name', 'country_name','Latitude', 'Longitude']]
#spatial points (unprojected)
seaports=gpd.GeoDataFrame(data=infoseaports.copy(),
                           geometry=gpd.points_from_xy(infoseaports.Longitude,
                                                       infoseaports.Latitude), 
                          crs=4326)# notice it is unprojected

# keep Brazil
seaports_bra=seaports[seaports['country_name']=='Brazil'].copy()

# reset indexes
seaports_bra.reset_index(drop=True, inplace=True)

# reprojecting
seaports_brazil5880=seaports_bra.to_crs(5880) # projected crs

#if we have
states_brazil5880.head()

# iloc for positions (same as in pandas DF)

states_brazil5880.iloc[:10,1:]

# loc for lables (same as in pandas DF)
states_brazil5880.loc[:8,'state_code':]

# GDF
type(states_brazil5880.loc[:8,'state_code':])

# df
type(states_brazil5880.loc[:8,:'state_code'])

# you lost the spatial structure when keeping ONE row!
type(states_brazil5880.loc[8,:])

# you keep the spatial structure if the row index is a list
type(states_brazil5880.loc[[8],:])

# complex conditions with query
airports_brazil5880.query('elevation_ft > 5000 and airport_type=="small_airport"')

# filter based on subset
choices=['large_airport','seaplane_base']
airports_brazil5880[airports_brazil5880.airport_type.isin(choices)]

# filter with text - startswith / endswith
textCondition=('Presi','Depu')
airports_brazil5880[airports_brazil5880.airport_name.str.startswith(textCondition)]

# filter with text - contains (more flexible)
textPattern='Presidente|Deputado'
airports_brazil5880[airports_brazil5880.airport_name.str.contains(textPattern)]
# notice 'Refinaria Presidente Bernardes Heliport'

# Filter rows where specific column is NOT null
airports_brazil5880[airports_brazil5880.elevation_ft.notna()]

# Filter rows where specific column IS null
airports_brazil5880[airports_brazil5880.elevation_ft.isna()]

brazil5880_cen=brazil5880.centroid
brazil5880_cen

mid_x,mid_y=brazil5880_cen.x[0],brazil5880_cen.y[0]
mid_x,mid_y

airports_brazil5880.cx[:,mid_y:]

# the viz
base=brazil5880.plot(color='yellow') # brazil on the back
airports_brazil5880.cx[:,mid_y:].plot(ax=base,markersize=1) # all the brazilian northern airports 
brazil5880.centroid.plot(color='red',ax=base) # check the centroid

# the north
N_brazil=states_brazil5880.cx[:,mid_y:]
# the south
S_brazil=states_brazil5880.cx[:,:mid_y]
# the west
W_brazil=states_brazil5880.cx[:mid_x,:]
# the east
E_brazil=states_brazil5880.cx[mid_x:,:]

base=N_brazil.plot()
brazil5880.centroid.plot(color='red',ax=base)

base=W_brazil.plot()
brazil5880.centroid.plot(color='red',ax=base)

states_brazil5880.columns

# This filter finds states larger than 1,000,000 km²
states_brazil5880[states_brazil5880.area > 1000000000000]

#see
municipalities_brazil5880.head(20)

muniRondonia=municipalities_brazil5880[municipalities_brazil5880.state_name=='Rondônia']

muniRondonia.plot(edgecolor='yellow')

muniRondonia.union_all()

Rondonia_union=muniRondonia.union_all()

# what do we have?
type(Rondonia_union)

gpd.GeoDataFrame(geometry=[Rondonia_union]) # the recent union

gpd.GeoDataFrame(index=[0], # one element
                 data={'state':'Rondonia'}, # the column and the value
                 crs=muniRondonia.crs, # important to avoid naive geometries
                 geometry=[Rondonia_union]) # the recent union

muniRondonia.dissolve()

Rondonia_dissolved=muniRondonia.dissolve()

# we got?
type(Rondonia_dissolved)

## see
Rondonia_dissolved

# keeping what is relevant
Rondonia_dissolved.drop(columns=['municipality_name','municipality_code'],inplace=True)

# then
Rondonia_dissolved

# dissolving municipalities again!- but by state
municipalities_brazil5880.dissolve(by='state_name').plot(facecolor='lightgrey', edgecolor='black',linewidth=0.2)

Brazil_munitoStates=municipalities_brazil5880.dissolve(by='state_name')

Brazil_munitoStates.head()

Brazil_munitoStates.drop(columns=['municipality_name',	'municipality_code'],inplace=True)
Brazil_munitoStates.reset_index(inplace=True)
Brazil_munitoStates.info()

indicators.head()

indicators.groupby('region').agg({'fragility':'mean'})

indicatorsByRegion=indicators.dissolve(
    by="region", #groupby()
    aggfunc={"fragility": "mean"}, #agg()
    )

## see the GeoDF
indicatorsByRegion

# You may need to install if using Colab
# !pip install mapclassify

indicatorsByRegion.plot(column ='fragility',edgecolor='white',
                        figsize=(15, 10))

large_airports=airports_brazil5880[airports_brazil5880.airport_type=='large_airport']
large_airports.plot()

## you see no difference!!
large_airports.convex_hull.plot()

# hull of the union
large_airports.union_all().convex_hull

# this geometry not a GeoDF...yet
type(large_airports.union_all().convex_hull)

LargeAirports_hull= gpd.GeoDataFrame(index=[0],
                                     data={'hull':'Large airports'}, # the column and the value
                                    crs=large_airports.crs,
                                    geometry=[large_airports.union_all().convex_hull])

# then

LargeAirports_hull

base=brazil5880.plot(facecolor='yellow')
large_airports.plot(ax=base)
LargeAirports_hull.plot(ax=base,facecolor='green',
                       edgecolor='white',alpha=0.4,
                       hatch='X')

# You can use it for dissolved polygons:
Rondonia_dissolved.convex_hull.plot()

# you got a series, not just a geometry 
type(Rondonia_dissolved.convex_hull)

# a simple "to_frame" does the job
Rondonia_dissolved.convex_hull.to_frame()

# more details
Rondonia_hull=Rondonia_dissolved.convex_hull.to_frame()
Rondonia_hull.rename(columns={0:"geometry"},inplace=True)
Rondonia_hull.set_geometry('geometry',inplace=True)
Rondonia_hull["name"]="Rondonia"
Rondonia_hull

#noticed the crs was inherited
Rondonia_hull.crs

#original not COMBINED:
Brazil_munitoStates.plot(edgecolor="yellow")

# hull of Non combined
Brazil_munitoStates.convex_hull.plot(edgecolor="yellow")

# the hull of Brazil
Brazil_munitoStates.dissolve().convex_hull.plot(edgecolor="yellow")

AmazonSystem=world_rivers[world_rivers.SYSTEM=='Amazon']
AmazonSystem.plot()

AmazonSystem.crs.is_projected

AmazonSystem_5880=AmazonSystem.to_crs(5880)

# 50000 at each side (radius)
AmazonSystem_5880.buffer(50000).plot(facecolor='yellow', edgecolor='black',linewidth=0.2)

type(AmazonSystem_5880.buffer(50000))

base=AmazonSystem_5880.buffer(50000).plot(facecolor='yellow',edgecolor='black',linewidth=0.2)
AmazonSystem_5880.plot(ax=base)

riv_buf_right = AmazonSystem_5880.buffer(distance = 50000, single_sided = True)
riv_buf_left = AmazonSystem_5880.buffer(distance = -25000, single_sided = True)

base =riv_buf_right.plot(color='green')
riv_buf_left.plot(ax=base, color='purple')

AmazonSystem_5880.to_file("AmazonSystem_5880.geojson", driver="GeoJSON")

# this is the centroid we have:
brazil5880_cen,brazil5880_cen.iloc[0]

airports_brazil5880[airports_brazil5880.distance(brazil5880_cen.iloc[0]) > 2500000]

base=airports_brazil5880[airports_brazil5880.distance(brazil5880_cen.iloc[0]) > 2500000].plot(marker='+',markersize=100)
airports_brazil5880.plot(ax=base,color='grey', markersize=0.1)
brazil5880_cen.plot(ax=base,color='red')

seaports_brazil5880.head()

large_airports.head()

large_airports.distance(seaports_brazil5880.loc[0,'geometry'])

# airport names as the index instead of row numbers:
large_airports.set_index('airport_name',inplace=True)
large_airports

seaports_brazil5880.set_index('seaport_name',inplace=True)

large_airports.distance(seaports_brazil5880.geometry.iloc[0])/1000

# apply creates a LOOP, computes distances from each seaport to all large airports
seaports_brazil5880.geometry.apply\
(lambda seaport: large_airports.geometry.distance(seaport)/1000)

D_Matrix_sea_air=seaports_brazil5880.geometry.apply \
                (lambda seaport: large_airports.geometry.distance(seaport)/1000)

Stat_sea_air=pd.DataFrame()
Stat_sea_air['mean']=D_Matrix_sea_air.mean(axis=1) # mean D to all airports
Stat_sea_air['min']=D_Matrix_sea_air.min(axis=1)# min D to all airports
Stat_sea_air['max']=D_Matrix_sea_air.max(axis=1)# max D to all airports

# see some
Stat_sea_air.head(10)

# farthest airport to each seaport
D_Matrix_sea_air.idxmax(axis=1).head()

# farthest seaport to each airport
D_Matrix_sea_air.idxmax(axis=0).head()

# closest airport to each seaport
D_Matrix_sea_air.idxmin(axis=1).head()

# closest seaport to each airport
D_Matrix_sea_air.idxmin(axis=0).head()

AmazonSystem_5880

indexLabel='Guarulhos - Governador André Franco Montoro International Airport'
AmazonSystem_5880[AmazonSystem_5880.RIVER=='Tapajos'].distance(large_airports.geometry.loc[indexLabel])/1000

AmazonSystem_5880.set_index('RIVER',inplace=True)

D_Matrix_amazRivs_air=AmazonSystem_5880.geometry.apply \
                (lambda river: large_airports.geometry.distance(river)/1000)

Stat_amz_air=pd.DataFrame()
Stat_amz_air['mean']=D_Matrix_amazRivs_air.mean(axis=1) # mean D to all airports
Stat_amz_air['min']=D_Matrix_amazRivs_air.min(axis=1)# min D to all airports
Stat_amz_air['max']=D_Matrix_amazRivs_air.max(axis=1)# max D to all airports

# see some
Stat_amz_air.head(10)

# closest river to each airport
D_Matrix_amazRivs_air.idxmin(axis=0).head()

# farthest river to each airport
D_Matrix_amazRivs_air.idxmax(axis=0).head()

river_systems=world_rivers[world_rivers.SYSTEM.isin(['Amazon','Parana'])]
river_systems

ama_para=river_systems.dissolve(by='SYSTEM')
ama_para.drop(columns='RIVER',inplace=True)
ama_para

ama_para.plot(cmap='viridis')

ama_para.convex_hull.plot(cmap='viridis')

ama_para.convex_hull,type(ama_para.convex_hull)

ama_para_hulls=ama_para.convex_hull.to_frame()
ama_para_hulls.rename(columns={0:'geometry'},inplace=True)
ama_para_hulls=ama_para_hulls.set_geometry('geometry')
ama_para_hulls.crs="EPSG:5880"

#voila
ama_para_hulls

D_Matrix_rivsHulls_air=ama_para_hulls.geometry.apply \
                (lambda system: large_airports.geometry.distance(system)/1000)
D_Matrix_rivsHulls_air

world_rivers

rivers_brazil5880 = gpd.clip(gdf=world_rivers.to_crs(5880),
                             mask=brazil5880)

base = brazil5880.plot(facecolor="greenyellow", edgecolor='black', linewidth=0.4,figsize=(5,5))
rivers_brazil5880.plot(edgecolor='blue', linewidth=0.5,
                    ax=base)

brazil5880.total_bounds #[minx, miny, maxx, maxy]

# or
minx, miny, maxx, maxy=brazil5880.total_bounds
minx, miny, maxx, maxy

north_mask = [minx, mid_y, maxx, maxy]
south_mask = [minx, minx, maxx, mid_y]

# split Brazil
states_brazil5880.clip(north_mask).plot(edgecolor="yellow")

states_brazil5880.clip(south_mask).plot(edgecolor="yellow")

large_airports.head()

states_brazil5880.head()

airports_within_states = gpd.sjoin(
    large_airports,         # LEFT: airports we want to filter/keep
    states_brazil5880,      # RIGHT: spatial boundaries to check against
    how='inner',            # return geometries that match in both LEFT/RIGHT (jointype)
    predicate='within'      # spatial condition: LEFT geometry within RIGHT geometry
)

# these are:
airports_within_states

states_containing_LargeAirports = gpd.sjoin(states_brazil5880,large_airports,how='inner',
                                            predicate='contains')

states_containing_LargeAirports

## Intersects needs at least a common point between both GeoDFs. 
gpd.sjoin(states_brazil5880,large_airports,
          how='inner', predicate='intersects')

# Neighbors of Bahia?
gpd.sjoin(N_brazil.loc[N_brazil.state_name=='Bahia',:],N_brazil,how='inner', predicate='intersects').shape

base=gpd.sjoin(N_brazil,N_brazil.loc[N_brazil.state_name=='Bahia',:],
               how='inner', 
               predicate='intersects').plot(color='yellow',edgecolor='red')
N_brazil.loc[N_brazil.state_name=='Bahia',:].plot(ax=base, color='red')

gpd.sjoin(N_brazil.loc[N_brazil.state_name=='Bahia',:],N_brazil,how='inner', predicate='touches').shape

base=gpd.sjoin(N_brazil,N_brazil.loc[N_brazil.state_name=='Bahia',:],
               how='inner', 
               predicate='touches').plot(color='yellow',edgecolor='red')
N_brazil.loc[N_brazil.state_name=='Bahia',:].plot(ax=base, color='red')

amazonSystem=rivers_brazil5880[rivers_brazil5880.SYSTEM=='Amazon']
amazonSystem

gpd.sjoin(amazonSystem,states_brazil5880,how='inner', predicate='intersects').shape

gpd.sjoin(amazonSystem,states_brazil5880,how='inner', predicate='crosses').shape

# Get intersects result
intersects_result = gpd.sjoin(amazonSystem,states_brazil5880, how='inner', predicate='intersects')

# Get crosses result
crosses_result = gpd.sjoin(amazonSystem,states_brazil5880, how='inner', predicate='crosses')

# Find indexes/columns
riverIndex_notCrossing=list(set(intersects_result.index)-set(crosses_result.index))
stateIndex_notCrossed=intersects_result[intersects_result.index.isin(riverIndex_notCrossing)].index_right

# see
states_brazil5880.loc[stateIndex_notCrossed,"state_name"], amazonSystem.loc[riverIndex_notCrossing,"RIVER"]

base=states_brazil5880.loc[stateIndex_notCrossed,:].plot(color='w',edgecolor='k',figsize=(12, 8))
amazonSystem.plot(ax=base)
amazonSystem.loc[riverIndex_notCrossing,:].plot(color='red',ax=base)

N_brazil

S_brazil

base= N_brazil.plot(facecolor='black', edgecolor='white',linewidth=0.2, alpha=0.6)
S_brazil.plot(facecolor='white', edgecolor='black',linewidth=0.2,ax=base, alpha=0.6)

set(S_brazil.state_name) & set(N_brazil.state_name)

set(E_brazil.state_name) & set(W_brazil.state_name)

# visualizing
base= E_brazil.plot(facecolor='black', edgecolor='white',linewidth=0.2, alpha=0.6)
W_brazil.plot(facecolor='white', edgecolor='black',linewidth=0.2,ax=base, alpha=0.6)

NS_brazil=N_brazil.overlay(S_brazil, how="intersection",keep_geom_type=True)
# see results
NS_brazil

NS_brazil[NS_brazil.state_name_1!= NS_brazil.state_name_2]

NS_brazil[NS_brazil.state_name_1!= NS_brazil.state_name_2].geometry.area.sum()

NS_brazil[NS_brazil.state_name_1!= NS_brazil.state_name_2].geometry.area.sum()/  \
NS_brazil[NS_brazil.state_name_1== NS_brazil.state_name_2].geometry.area.sum() #continues from above

NS_brazil_messy=NS_brazil.copy()
NS_brazil=NS_brazil[NS_brazil.state_name_1== NS_brazil.state_name_2]

NS_brazil

# avoid redundancy
keep=['state_name_1','state_code_1','geometry']
NS_brazil=NS_brazil.loc[:,keep]
NS_brazil.rename(columns={'state_name_1':'state_name','state_code_1':'state_code'},inplace=True)

# reset for correlative sequence
NS_brazil.reset_index(drop=True, inplace=True)

# keeping the overlay
WE_brazil=W_brazil.overlay(E_brazil, how="intersection",keep_geom_type=True)
WE_brazil[WE_brazil.state_name_1!= WE_brazil.state_name_2]

WE_brazil_messy=WE_brazil.copy()
WE_brazil=WE_brazil[WE_brazil.state_name_1== WE_brazil.state_name_2]

keep=['state_name_1','state_code_1','geometry']
WE_brazil=WE_brazil.loc[:,keep]
WE_brazil.rename(columns={'state_name_1':'state_name','state_code_1':'state_code'},inplace=True)
WE_brazil.reset_index(drop=True, inplace=True)

NS_brazil.info()

WE_brazil.info()

# now
NS_brazil.overlay(WE_brazil,how="union",keep_geom_type=True)

# appending
import pandas as pd

pd.concat([NS_brazil,WE_brazil],ignore_index=True)

MidBrazil=NS_brazil.overlay(WE_brazil,how="union",keep_geom_type=True).dissolve()
MidBrazil

# some cleaning

MidBrazil['country']='Brazil'
MidBrazil['region']='center'
# reordering
MidBrazil=MidBrazil.loc[:,['country','region','geometry']]

MidBrazil

# see it
base=brazil5880.plot(facecolor='yellow')
MidBrazil.plot(ax=base)

# we keep nothern states that are not in the southern ones
N_brazil.overlay(S_brazil, how='difference')

# using set operations:
set(N_brazil.state_name)- set(S_brazil.state_name)

base=N_brazil.plot(color='yellow', edgecolor='black',alpha=0.1)
N_brazil.overlay(S_brazil, how='difference').plot(ax=base)

S_brazil.overlay(N_brazil, how='difference')

base=N_brazil.plot(color='yellow', edgecolor='black',alpha=0.1)
S_brazil.overlay(N_brazil, how='difference').plot(ax=base)

N_brazil.overlay(S_brazil, how='symmetric_difference')

N_brazil.overlay(S_brazil, how='symmetric_difference').plot()

# non valid
S_brazil[~S_brazil.is_valid]

# see the invalid:
S_brazil[~S_brazil.is_valid].plot()

# what is wrong?

from shapely.validation import explain_validity, make_valid

explain_validity(S_brazil[~S_brazil.is_valid].geometry)

explain_validity(S_brazil.geometry).str.split("[",expand=True)[0].value_counts()

S_brazil_valid=S_brazil.copy()

S_brazil_valid['geometry'] = [make_valid(row)  if not row.is_valid else row for row in S_brazil['geometry'] ]

#any invalid?
S_brazil_valid[~S_brazil_valid.is_valid]

pd.Series([type(x) for x in S_brazil_valid.geometry]).value_counts()

S_brazil_valid=S_brazil.copy()

S_brazil_valid['geometry'] = S_brazil_valid['geometry'].buffer(0)

#any invalid?
S_brazil_valid[~S_brazil_valid.is_valid]

# previously
indicatorsByRegion.plot(column =indicatorsByRegion.index,
                        edgecolor='white',
                        figsize=(15, 10))

indicatorsByRegion.geometry.is_valid.value_counts()

indicatorsByRegion_prjd=indicatorsByRegion.to_crs("ESRI:54052").copy()
indicatorsByRegion_prjd['geometry'] = indicatorsByRegion_prjd.buffer(0)

# previously
indicatorsByRegion_prjd.plot(column =indicatorsByRegion_prjd.index,
                        edgecolor='white',
                        figsize=(15, 10))

indicatorsByRegion_prjd['geometry'] = indicatorsByRegion_prjd.buffer(1)

indicatorsByRegion_prjd.plot(column =indicatorsByRegion_prjd.index,
                        edgecolor='white',
                        figsize=(15, 10))

[(r,len(g.geoms)) for r,g in zip(indicatorsByRegion.index,indicatorsByRegion.geometry) if g.geom_type.startswith('Multi')]

[(r,len(g.geoms)) for r,g in zip(indicatorsByRegion_prjd.index,indicatorsByRegion_prjd.geometry)  if g.geom_type.startswith('Multi')]

china=indicators[indicators.Country.isin(['CHINA'])]
mongolia=indicators[indicators.Country.isin(['MONGOLIA'])]

china.overlay(mongolia, how='intersection',keep_geom_type=False).geometry

# Quick count of objects in the GeometryCollection
result_geom = china.overlay(mongolia, how='intersection',keep_geom_type=False).geometry.iloc[0]
if result_geom.geom_type == 'GeometryCollection':
    print(f"Objects in collection: {len(result_geom.geoms)}")
    from collections import Counter
    print(dict(Counter(g.geom_type for g in result_geom.geoms)))

## see the intersection:
base=china.plot(color='lightgrey')
mongolia.plot(color='yellow',ax=base)
china.overlay(mongolia, how='intersection',keep_geom_type=False).plot(ax=base)

Basic Spatial operations on Geo Dataframes¶

Getting ready¶

UNARY Operations on Geo DataFrames¶

I. Filtering¶

a. Using iloc and loc¶

b. More Pandas Filtering for GeoPandas¶

c. Slicing with cx¶

d. Filtering by attribute value¶

II. Combining geometries¶

II.1 Unary UNION¶

II.2 Dissolve¶

a. Dissolve as Union¶

b. Dissolve for groups¶

c. Dissolve and aggregate¶

III. Enveloping geometries: the convex hull¶

IV. Buffering geometries¶

BINARY Operations:¶

I. Distance¶

a. Distance between points¶

b. Distance between line and point¶

c. Between Polygon and Point¶

II. Clipping¶

III. Spatial Joins¶

a. Within¶

b. Contains¶

c. Intersects¶

d. Touches¶

e. Crosses¶

IV. Spatial Overlay¶

a. Intersection¶

b. Union¶

c. Difference¶

d. Symmetric Difference¶

Validity of Geometries¶

Buffers and Validity¶