Mobility for resilience: displacement analysis

This notebook shows how to transform raw mobility data to a displacement analsysis using mobilkit.

We start loading raw HFLB data using the mobilkit.loader module.

Then, we import a shapefile to tessellate data and dynamically analize where people spend time during night before and after a major event (Puebla 2017 earthquake in Mexico). Different stratification (spatial and socio-economic) of the displacement rate are shown.

[1]:
%config Completer.use_jedi = False
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.gridspec import GridSpec
from matplotlib.dates import DateFormatter

import glob, os
from datetime import datetime as dt
from datetime import timedelta, datetime
from datetime import timezone
import pytz
from math import sin, cos, sqrt, atan2, radians
from scipy.optimize import minimize
from scipy import stats

### import Dask library (https://dask.org/)
import dask
import dask.dataframe as dd
from dask import delayed
from dask.diagnostics import ProgressBar
from dask.distributed import Client, LocalCluster

### import geospatial libraries
import geopandas as gpd
from haversine import haversine
import contextily as ctx
import pyproj

### directory that contains dataset(s) you want to analyze
filepath = "/data/WB_Mexico/gpsdata_eq/testdata_all/"
datapath = "../../data/"
outpath  = "../../results/"
[4]:
import warnings
warnings.filterwarnings('ignore')

Import external data

Administrative boundary shapefiles

[5]:
fig,ax = plt.subplots()
adm1_f = datapath+"spatial/boundaries_shapefiles/mex_admbnda_adm1_govmex/"
adm1_shp = gpd.read_file(adm1_f)
adm1_shp.boundary.plot(ax=ax)
plt.show()
../_images/examples_M4R_02_DisplacementAnalysis_4_0.png
[33]:
fig,ax = plt.subplots()
adm2_f = datapath+"spatial/boundaries_shapefiles/mex_admbnda_adm2_govmex/"
adm2_shp = gpd.read_file(adm2_f)
adm2_shp = adm2_shp[["ADM2_PCODE","ADM2_ES","geometry"]]
adm2_shp.boundary.plot(ax=ax, linewidth=.3)
plt.show()
../_images/examples_M4R_02_DisplacementAnalysis_5_0.png
[34]:
adm2_shp.head()
[34]:
ADM2_PCODE ADM2_ES geometry
0 MX01001 Aguascalientes POLYGON ((-102.09775 22.02325, -102.09857 22.0...
1 MX01002 Asientos POLYGON ((-101.99941 22.21951, -101.99940 22.2...
2 MX01003 Calvillo POLYGON ((-102.57625 21.96778, -102.57626 21.9...
3 MX01004 Cos POLYGON ((-102.25320 22.37449, -102.25239 22.3...
4 MX01005 Jes POLYGON ((-102.31034 22.03716, -102.30653 22.0...
[38]:
# Turn into a centroid
adm2_shp["centroid"] = adm2_shp.centroid
adm2_shp.geometry = adm2_shp.centroid
[40]:
fig,ax = plt.subplots()
adm2_shp.plot(ax=ax, linewidth=.3)
[40]:
<AxesSubplot:>
../_images/examples_M4R_02_DisplacementAnalysis_8_1.png

Seismic intensity shapefile

[41]:
seismic_shp_f = datapath+"spatial/seismicdata/intensity/"
seismic_shp = gpd.read_file(seismic_shp_f)[["PARAMVALUE","geometry"]]
[42]:
seismic_shp.tail()
[42]:
PARAMVALUE geometry
20 7.0 MULTIPOLYGON (((-99.00292 19.21667, -99.00471 ...
21 7.2 MULTIPOLYGON (((-98.74806 18.85833, -98.74864 ...
22 7.4 MULTIPOLYGON (((-98.67707 18.80000, -98.67940 ...
23 7.6 MULTIPOLYGON (((-98.44767 18.70000, -98.44861 ...
24 7.8 MULTIPOLYGON (((-98.48333 18.39997, -98.48336 ...
[43]:
seismic_shp_hm = seismic_shp.to_crs(epsg=3857)
[44]:
fig,ax = plt.subplots(1,1,figsize=(6,6))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
seismic_shp_hm.plot(ax=ax, column='PARAMVALUE', legend=True, cmap='OrRd',
                 cax=cax, legend_kwds={'label': "Seismic Intensity"},
                 zorder=2.5, alpha=0.5)

ctx.add_basemap(ax, source=ctx.providers.CartoDB.Voyager)
ax.set_axis_off()
plt.show()
../_images/examples_M4R_02_DisplacementAnalysis_13_0.png
[45]:
adm2_SI = gpd.sjoin(adm2_shp, seismic_shp, how="left", \
                    op='intersects')[["ADM2_PCODE","PARAMVALUE"]]
[46]:
adm2_SI[adm2_SI["ADM2_PCODE"]=="MX09002"]
[46]:
ADM2_PCODE PARAMVALUE
265 MX09002 6.0

Population data

[47]:
poppath = datapath+"sociodemographic/populationdata/"
df_pop = pd.DataFrame()
# Load only the states we are interested in
for es in ["09","17","21","29"]:
    pop = poppath+"resultados_ageb_urbana_"+es+"_cpv2010.csv"
    df_pop1 = pd.read_csv(pop)[["entidad","mun","loc","ageb","mza","pobtot"]]
    df_pop = df_pop.append(df_pop1, ignore_index=True)

df_pop = df_pop[(df_pop["mun"]!=0) & (df_pop["loc"]==0)][["entidad","mun","pobtot"]]

df_pop["PCODE"] = df_pop.apply(lambda row : "MX"+str(row["entidad"]).zfill(2)+str(row["mun"]).zfill(3), axis=1)
[50]:
df_pop.head()
[50]:
entidad mun pobtot PCODE
1 9 2 414711 MX09002
3097 9 3 620416 MX09003
7970 9 4 186391 MX09004
9010 9 5 1185772 MX09005
17648 9 6 384326 MX09006
[51]:
adm2_SI_pop = adm2_SI.merge(df_pop, left_on="ADM2_PCODE", right_on="PCODE")[["PCODE","PARAMVALUE","pobtot"]]
[52]:
adm2_SI_pop.head()
[52]:
PCODE PARAMVALUE pobtot
0 MX09002 6.0 414711
1 MX09003 6.8 620416
2 MX09004 6.2 186391
3 MX09005 5.4 1185772
4 MX09006 6.6 384326

Wealth index data

[53]:
wealthidx_f = datapath+"sociodemographic/wealthindex/pca_index_AGEBS_localidades.csv"
wealthidx = pd.read_csv(wealthidx_f, header=0,
                        names = ["index","code","pca","index_pca"])
wealthidx.head()
/home/ubi/Sandbox/mobilkit_dask/mobenv/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3146: DtypeWarning: Columns (1) have mixed types.Specify dtype option on import or set low_memory=False.
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
[53]:
index code pca index_pca
0 0 0100100010229 -2.680167 0.147990
1 1 0100100010233 -2.701735 0.146480
2 2 0100100010286 -3.474532 0.092363
3 3 0100100010290 -3.404371 0.097277
4 4 0100100010303 -3.099987 0.118592
[54]:
wealthidx["PCODE"] = wealthidx["code"].apply(lambda x : "MX"+str(x)[:5])
wealthidx_avg = wealthidx.groupby("PCODE")["index_pca"].mean().reset_index()
[56]:
wealthidx_avg.head()
[56]:
PCODE index_pca
0 MX01001 0.206558
1 MX01002 0.232278
2 MX01003 0.233212
3 MX01004 0.216361
4 MX01005 0.216503
[57]:
plt.figure(figsize=(4,3))
plt.hist(wealthidx_avg["index_pca"].values, bins=50)
plt.show()
../_images/examples_M4R_02_DisplacementAnalysis_25_0.png
[58]:
# Merge this info in the code mapping df
adm2_SI_pop_WI = adm2_SI_pop.merge(wealthidx_avg, on="PCODE")
[59]:
adm2_SI_pop_WI.head(10)
[59]:
PCODE PARAMVALUE pobtot index_pca
0 MX09002 6.0 414711 0.124537
1 MX09003 6.8 620416 0.106024
2 MX09004 6.2 186391 0.182998
3 MX09005 5.4 1185772 0.140297
4 MX09006 6.6 384326 0.134230
5 MX09007 7.0 1815786 0.157710
6 MX09008 6.0 239086 0.181795
7 MX09009 7.0 130582 0.285495
8 MX09010 6.8 727034 0.135709
9 MX09011 7.4 360265 0.199308

Compute displacement rate

Get valid user IDs

Filter users based on statistic.

[1]:
idhome = "data/id_home_3_1.csv"
df_idhome = pd.read_csv(idhome)
df_idhome["home"] = df_idhome["home"].apply(lambda v: [e for e in v.replace("[","")
                                                       .replace("]","")
                                                        .split(" ")
                                                       if len(e)>0])
df_idhome["homelat"] = df_idhome["home"].apply(lambda v: float(v[1]))
df_idhome["homelon"] = df_idhome["home"].apply(lambda v: float(v[0]))
df_idhome = df_idhome[["uid","homelat","homelon"]].copy()
allids = set(df_idhome["uid"].values)
[7]:
df_idhome.shape
[7]:
(279541, 3)
[9]:
len(allids)
[9]:
279541

Extract data of above IDs

We just lazily load the data and then filter on the ids. We get for free the localized datetime column.

If you want to persist these data separated per user at the different steps we show how to do it.

We connect to dask and then load and filter data.

[12]:
client = Client(address="127.0.0.1:8786", )
[12]:
{'tcp://127.0.0.1:38661': {'status': 'OK'},
 'tcp://127.0.0.1:39151': {'status': 'OK'},
 'tcp://127.0.0.1:39553': {'status': 'OK'},
 'tcp://127.0.0.1:40421': {'status': 'OK'},
 'tcp://127.0.0.1:41351': {'status': 'OK'},
 'tcp://127.0.0.1:42609': {'status': 'OK'},
 'tcp://127.0.0.1:44517': {'status': 'OK'},
 'tcp://127.0.0.1:45691': {'status': 'OK'}}
[13]:
client
[13]:

Client

Cluster

  • Workers: 8
  • Cores: 8
  • Memory: 32.00 GB
[14]:
tz = pytz.timezone("America/Mexico_City")
alldataf = dd.read_parquet("/data/datiHFLBPARQUET/")
filtered_dataf = mobilkit.stats.filterUsersFromSet(alldataf, allids)
[19]:
if False:
    # Now we can persist these data as in the original example
    # I prefer to use the parquet format which is faster
    alldataf = "../../results/displacement_selectedids_all_data"
    filtered_dataf.repartition(partition_size="20M").to_parquet(alldataf)
[2]:
# Now I can quickly reload this first step of selection
alldataf = "../../results/displacement_selectedids_all_data"
filtered_dataf_reloaded = dd.read_parquet(alldataf).repartition(partition_size="200M")
if "datetime" not in filtered_dataf_reloaded.columns:
    # Add datetime column
    import pytz
    tz = pytz.timezone("America/Mexico_City")
    # Filter on dates...
    filtered_dataf_reloaded = mobilkit.loader.filterStartStopDates(filtered_dataf_reloaded,
                                                    start_date="2017-09-04",
                                                      stop_date="2017-10-08",
                                                      tz=tz,)
    filtered_dataf_reloaded = mobilkit.loader.compute_datetime_col(filtered_dataf_reloaded, selected_tz=tz)

Get daily displacement distance

All these computing times are obtained on a personal laptop local cluster with:

Client
Scheduler: tcp://127.0.0.1:8786
Dashboard: http://127.0.0.1:8787/status
Cluster
Workers: 3
Cores: 3
Memory: 28.00 GB

for limited I/O performances. These should scale better on a cluster.

[28]:
# Prepare pings adding date and filtering on hour...
df_displacement_ready = mobilkit.temporal.filter_daynight_time(
                                                filtered_dataf_reloaded,
                                                filter_to_h=9,
                                                filter_from_h=21,
                                                previous_day_until_h=4,
                                            )
[28]:
# We now compute the displacement figures in one line and save it to disk
processed_diplacement = mobilkit.displacement.calc_displacement(df_displacement_ready,
                                                                df_idhome)
[29]:
# Persist to disk
tic = datetime.now()
if False:
    processed_diplacement.to_parquet("../../results/displacement_selectedids_processed/")
else:
    processed_diplacement = dd.read_parquet("../../results/displacement_selectedids_processed/")
toc = datetime.now()
[30]:
tot_sec = (toc - tic).total_seconds()
print("Done in %d hours and %.01f minutes!" % (tot_sec//3600, (tot_sec % 3600)/60))
Done in 2 hours and 21.7 minutes!
[31]:
# Total number of users and number of pings
stats_df = filtered_dataf_reloaded.groupby("uid").agg("count").compute()
print("Users:", stats_df.shape[0])
print("Pings:", stats_df["lat"].sum())
Users: 279541
Pings: 318852179

Analyze displacement rates

Per-id home location

[60]:
# Transform the data in a geodataframe for spatial queries
idhome_gdf = gpd.GeoDataFrame(df_idhome,
                              geometry=gpd.points_from_xy(df_idhome.homelon,
                                                          df_idhome.homelat))
[121]:
adm2_f = datapath + "spatial/boundaries_shapefiles/mex_admbnda_adm2_govmex/"
adm2_shp = gpd.read_file(adm2_f)
[63]:
# Spatial join, then I can aggregate by Municipality or other features
id_homecode = gpd.sjoin(idhome_gdf,adm2_shp[["ADM2_PCODE","geometry"]])
id_homecode = id_homecode[["uid","homelon",
                           "homelat","ADM2_PCODE"]].rename(columns={"ADM2_PCODE":"PCODE"})
<ipython-input-63-ec4458acad15>:1: UserWarning: CRS mismatch between the CRS of left geometries and the CRS of right geometries.
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  id_homecode = gpd.sjoin(idhome_gdf,adm2_shp)
[3]:
id_home_feat = id_homecode.merge(adm2_SI_pop_WI, on="PCODE")
[66]:
muncode_count = id_homecode.groupby("PCODE").count().reset_index()
[3]:
muncode_rate = muncode_count.merge(adm2_SI_pop_WI, on ="PCODE")
muncode_rate["rate"] = muncode_rate["uid"]/muncode_rate["pobtot"]

Macroscopic analysis

Reload previous results and stratify by different user status.

Seismic intensity

[71]:
df_disp = dd.read_parquet("results/displacement_selectedids_processed/")
[72]:
# Now we are working on dask, I port to pandas with .compute()
df_disp2 = df_disp.merge(id_homecode, on="uid", how="left").compute()
[74]:
df_disp3 = df_disp2.merge(adm2_SI_pop_WI, on="PCODE", how="left")
[76]:
# Helper function to determine the Seismic intensity level
def categorizeSI(si):
    if si>=7:
        r = 7
    elif si>=6.5:
        r = 6.5
    elif si>=6:
        r = 6
    elif si >=5:
        r = 5
    elif si >=4:
        r = 4
    else:
        r = 0
    return r
[77]:
df_disp3["SI_cat"] = df_disp3["PARAMVALUE"].apply(lambda x : categorizeSI(x))

Compute displacement rates

[4]:
df_disp4 = df_disp3[df_disp3["lng"]!=0].copy()
[80]:
dist = "mindist"
df_disp4["500m"] = df_disp4[dist].apply(lambda x : 1 if x>0.5 else 0)
df_disp4["1km"] = df_disp4[dist].apply(lambda x : 1 if x>1 else 0)
df_disp4["3km"] = df_disp4[dist].apply(lambda x : 1 if x>3 else 0)
df_disp4["5km"] = df_disp4[dist].apply(lambda x : 1 if x>5 else 0)
df_disp4["10km"] = df_disp4[dist].apply(lambda x : 1 if x>10 else 0)
[5]:
si_count = df_disp4[df_disp4["date"]==dt(2017,9, 3)]\
                                    .groupby('SI_cat')\
                                    .agg("count").reset_index()

Displacement plot by SI

[84]:
sis = sorted(set(df_disp4["SI_cat"]))
sis = [5.0, 6.0, 6.5, 7.0]

cms = plt.get_cmap("jet",len(sis))

scale = "500m"

df_this = df_disp4[df_disp4["SI_cat"]==0]
date_disp = df_this.groupby('date').mean().reset_index()
date_disp["date_dt"] = date_disp["date"].values# apply(lambda x : dt.strptime(str(x), '%Y%m%d'))
baseline = date_disp["500m"].values
[88]:
from scipy.optimize import minimize

def fit_baseline(data,baseline):

    def axb(p,x):
        return p[0]*x

    def errortot(data, baseline):
        tot = 0
        for i in np.arange(15):
            tot = tot + (baseline[i]-data[i])**2
        return tot

    x0 = np.array([1])
    res = minimize(lambda p: errortot(axb(p, data), baseline), x0=x0, method='Powell')

    return res.x
[137]:
def plotforSI(df_disp_se,si,ax,color, category, label, ylab, colname, baseline):
    df_this = df_disp_se[df_disp_se[category]==si]
    date_count = df_this.groupby('date').count().reset_index()[["date","uid"]]
    date_std = df_this.groupby('date').std().reset_index()[["date",colname]]
    date_std = date_std.rename(columns= {colname:"std"})
    date_disp = df_this.groupby('date').mean().reset_index()
    date_disp["date_dt"] = date_disp["date"].values # .apply(lambda x : dt.strptime(str(x), '%Y%m%d'))
    date_disp["youbi"] = date_disp["date_dt"].apply(lambda x : x.weekday())
    date_disp = date_disp.merge(date_count, on="date")
    date_disp = date_disp.merge(date_std, on="date")

    data = date_disp[colname].values
    a = fit_baseline(data, baseline)
    print(a.shape, data.shape, baseline.shape)
    res = (a*data-baseline)*100
    ax.plot(date_disp["date_dt"],res, color=color, label="Estimated")
    date_disp["error"] = date_disp.apply(lambda x : 196*np.sqrt((x[colname]*(1-x[colname]))/x["uid"]), \
                                             axis=1)

    ax.fill_between(date_disp["date_dt"],res-date_disp["error"].values, \
                    res+date_disp["error"].values,
                    color=color, alpha=0.3, label="95% CI")
    ax.xaxis.set_major_formatter(DateFormatter('%b %d'))
    ax.axhline(0, color="gray")
    # ax.set_xticks(["20170905","20170915","20170925","20171005"])
    ax.set_ylim(-2.5,6)
    ax.set_ylabel(ylab[0], fontsize=12)
    ax.axvline(datetime(2017,9,19), color="red")
    ax.legend(fontsize=12, ncol=5, loc="upper left")
    ax.set_title(label, fontsize=14)
[138]:
fig=plt.figure(figsize=(7.5,9))
gs=GridSpec(5,1)

res_si = {}

category = "SI_cat"
ylabs = ["Disaster\nDisplacement (%)", "$\Delta D$"]
colors = ["blue", "skyblue", "orange", "red"]
titles = ["6.0 > Seismic Intensity "+r"$\geq$"+"5.0",
          "6.5 > Seismic Intensity "+r"$\geq$"+"6.0",
          "7.0 > Seismic Intensity "+r"$\geq$"+"6.5",
          "Seismic Intensity "+r"$\geq$"+"7.0"]
for si,i in zip(sis,np.arange(len(sis))):
    ax = fig.add_subplot(gs[i,0])
    plotforSI(df_disp4, si, ax, colors[i], category, titles[i], ylabs, scale, baseline)

plt.tight_layout()
# plt.savefig("C:/users/yabec/desktop/displacement_si.png",
#             dpi=300, bbox_inches='tight', pad_inches=0.05)
plt.show()
(1,) (35,) (35,)
(1,) (35,) (35,)
(1,) (35,) (35,)
(1,) (35,) (35,)
../_images/examples_M4R_02_DisplacementAnalysis_65_1.png

Displacement rate by wealth index

[6]:
df_disp_se_5 = df_disp4[df_disp4["SI_cat"]>=6.5]
[92]:
aaa = df_disp_se_5[df_disp_se_5["date"]==dt(2017,9,8)].groupby("index_pca")["500m"].mean().reset_index()
bbb = df_disp_se_5[df_disp_se_5["date"]==dt(2017,9,8)].groupby("index_pca")["500m"].count().reset_index()

aaa.shape, bbb.shape
[92]:
((106, 2), (106, 2))
[114]:
x = aaa["index_pca"].values
y = aaa["500m"].values
z = bbb["500m"].values
newz = []
xx = []
yy = []
for k,j,i in zip(x,y,z):
    if i > 20:
        xx.append(k)
        yy.append(j*100-baseline[15]*100)
        newz.append(np.sqrt(i)*5)
[115]:
plt.scatter(xx, yy, s=newz, edgecolor="b", facecolor="white")
c1, i1, s1, p_value, std_err = stats.linregress(xx,yy)
print(c1, i1, s1, p_value, std_err)
plt.plot([0,1],[i1,i1+c1], linestyle="-", color="gray")
plt.annotate("R="+str(s1)[:5]+"\n($p<0.05$)", xy=(0.4,0), fontsize=12)
plt.ylim(-10,10)
plt.xlim(0,0.5)
plt.xlabel("Wealth Index", fontsize=14)
plt.ylabel("Disaster\nDisplacement (%)", fontsize=14)

plt.tight_layout()
# plt.savefig("C:/users/yabec/desktop/wealth_disp.png",
#             dpi=300, bbox_inches='tight', pad_inches=0.05)
plt.show()
-25.29461166237592 4.122102681557249 -0.39968759624164824 0.01736848907416504 10.098447012463001
../_images/examples_M4R_02_DisplacementAnalysis_70_1.png

Look at high damage + sample rate areas in detail

[116]:
### merge with disaster damage data (below)
muncode_rate["idlog"] = muncode_rate["uid"].apply(lambda x : np.log10(x))
[118]:
fig=plt.figure(figsize=(8,4))
ax = fig.add_subplot(1, 1, 1)
muncode_rate.plot.scatter("PARAMVALUE","rate",c='idlog',
                               colormap='jet', edgecolor="gray", s=50,ax=ax)
ax.set_xlim(4,8)
ax.set_ylim(0,0.015)
# ax.set_xticklabels([4,5,6,7,8])
ax.set_xlabel("Seismic intensity", fontsize=14)
ax.set_ylabel("Sample rate", fontsize=14)

f = plt.gcf()
cax = f.get_axes()[1]
cax.set_ylabel('log(mobile phone IDs)', fontsize=14)

plt.tight_layout()
# plt.savefig("C:/users/yabec/desktop/samplerate_si.png",
#             dpi=300, bbox_inches='tight', pad_inches=0.05)
plt.show()
../_images/examples_M4R_02_DisplacementAnalysis_73_0.png
[4]:
target = muncode_rate[(muncode_rate["PARAMVALUE"]>=6.5) & (muncode_rate["uid"]>=30)]
[120]:
targetcodes = target["PCODE"].values
targetcodes = ['MX09015', 'MX09003', 'MX09010', 'MX09007', 'MX09014', 'MX09017']
[124]:
names = []
for t in targetcodes:
    name = adm2_shp[adm2_shp["ADM2_PCODE"]==t]["ADM2_ES"].values[0]
    SI = target[target["PCODE"]==t]["PARAMVALUE"].values[0]
    ids = target[target["PCODE"]==t]["uid"].values[0]
    if "Cua" in name:
        name = "Cuauhtemoc"
    elif "lvaro" in name:
        name = "Olvaro Obregzn"
    names.append(name+" (code: "+t+", SI="+str(SI)+", Users="+str(ids)+")")

names
[124]:
['Cuauhtemoc (code: MX09015, SI=7.0, Users=3285)',
 'Coyoacln (code: MX09003, SI=6.8, Users=2570)',
 'Olvaro Obregzn (code: MX09010, SI=6.8, Users=2774)',
 'Iztapalapa (code: MX09007, SI=7.0, Users=5016)',
 'Benito Julrez (code: MX09014, SI=6.8, Users=2512)',
 'Venustiano Carranza (code: MX09017, SI=6.6, Users=1935)']
[139]:
fig=plt.figure(figsize=(15,2.5*3))
gs=GridSpec(3,2)

res_si = {}

category = "PCODE"
ylabs = ["Disaster\nDisplacement (%)", "$\Delta D$"]
colors = ["red", "orange", "orange", "red", "orange", "orange", "orange"]
titles = names

for i,pcode in enumerate(targetcodes):
    x,y = i, 0
    if i>2:
        y = 1
        x = i - 3
#     print(x,y)
    ax = fig.add_subplot(gs[x,y])
    plotforSI(df_disp4, pcode, ax, colors[i], category, titles[i], ylabs, scale, baseline)

plt.tight_layout()
# plt.savefig("C:/users/yabec/desktop/displacement_places.png",
#             dpi=300, bbox_inches='tight', pad_inches=0.05)
plt.show()
(1,) (35,) (35,)
(1,) (35,) (35,)
(1,) (35,) (35,)
(1,) (35,) (35,)
(1,) (35,) (35,)
(1,) (35,) (35,)
../_images/examples_M4R_02_DisplacementAnalysis_77_1.png

Zoom to Areas of interest

Cuahtlemoc

[180]:
date = dt(2017,9,20)
df_disp4_cua = df_disp4[(df_disp4["PCODE"]=="MX09015") & (df_disp4["date"]==date)].copy()
df_disp4_cua["distance"] = df_disp4_cua.apply(lambda row: np.log10(max(.001,
                        haversine([row["homelat_x"],row["homelon_x"]],
                                  [row["lat"],row["lng"]]))),axis=1)

# df_disp4_cua = df_disp4_cua[(df_disp4_cua["distance"]>2)]
df_disp4_cua = df_disp4_cua[(df_disp4_cua["distance"].between(1e-3, 1000))]

# df_disp4_cua = df_disp4_cua[((df_disp4_cua["distance"]>0.5) & (df_disp4_cua["distance"]<1.5))]

# df_disp4_cua = df_disp4_cua[((df_disp4_cua["distance"]>0.5) & (df_disp4_cua["distance"]<1.5)) | (df_disp4_cua["distance"]<-1)]

df_disp4_cua_gdf = gpd.GeoDataFrame(df_disp4_cua, geometry=gpd.points_from_xy(df_disp4_cua.lng, df_disp4_cua.lat))
df_disp4_cua_gdf = df_disp4_cua_gdf[["uid","geometry"]]
[181]:
df_disp4_cua_gdf.columns
[181]:
Index(['uid', 'geometry'], dtype='object')
[182]:
df_disp4_cua_to = gpd.sjoin(df_disp4_cua_gdf,adm2_shp)
<ipython-input-182-bc7533da0982>:1: UserWarning: CRS mismatch between the CRS of left geometries and the CRS of right geometries.
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  df_disp4_cua_to = gpd.sjoin(df_disp4_cua_gdf,adm2_shp)
[183]:
targetcode_count = df_disp4_cua_to.groupby("ADM2_PCODE")["uid"].count().reset_index()
targetcode_count["idlog"] = targetcode_count["uid"].apply(lambda x : np.log10(x))
[184]:
mun_ids_pop_shp = adm2_shp.merge(targetcode_count, on="ADM2_PCODE", how="right")
mun_ids_pop_shp = mun_ids_pop_shp.to_crs(epsg=3857)
[185]:
fig,ax = plt.subplots(figsize=(20,10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
mun_ids_pop_shp.plot(ax=ax, column='idlog', cmap='OrRd', legend=True,
                    cax=cax, legend_kwds={'label': "Users (log10)"}, alpha=.6)
ctx.add_basemap(ax, source=ctx.providers.Stamen.TonerLite)
# plt.savefig("C:/users/yabec/desktop/displacement_10km.png",
#             dpi=300, bbox_inches='tight', pad_inches=0.05)
plt.show()
../_images/examples_M4R_02_DisplacementAnalysis_84_0.png

Distance of displacement

[186]:
df_disp4_cua["distance"] = df_disp4_cua.apply(lambda row: np.log10(haversine([row["homelat_x"],row["homelon_x"]],
                                                                             [row["lat"],row["lng"]])),axis=1)
[187]:
fig,ax = plt.subplots(figsize=(6,4))

ax.hist(df_disp4_cua["distance"].values, bins=50, color="b")
ax.set_xticks([-3,-2,-1,0,1,2,3])
ax.set_xticklabels(["$10^{-3}$","$10^{-2}$","$10^{-1}$","$10^{0}$",
                    "$10^{1}$","$10^{2}$","$10^{3}$"])
ax.set_xlabel("Displaced distance (km)", fontsize=14)
ax.set_ylabel("Users", fontsize=14)
ax.set_title("Displacement from Cuauhtemoc", fontsize=15, pad=10)

# plt.savefig("C:/users/yabec/desktop/displacement_distance.png",
#             dpi=300, bbox_inches='tight', pad_inches=0.05)
plt.show()
../_images/examples_M4R_02_DisplacementAnalysis_87_0.png
[ ]: