Title¶

Subtitle¶

Author name

First slide¶

Content of the first slide

In [2]:
np.random.randint(low=1, high=7, size=15)
Out[2]:
array([6, 2, 6, 4, 4, 3, 6, 6, 4, 6, 5, 4, 2, 4, 1])

Second slide¶

Example of ipyleaflet showing maps

In [4]:
center = (52.204793, 360.121558)
m = Map(center=center, zoom=15)
marker = Marker(location=center, draggable=True)
m.add_layer(marker);
display(m)

Example of heatmap on top of map¶

In [5]:
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=2)

heatmap = Heatmap(
    locations=[[uniform(-80, 80), uniform(-180, 180), uniform(0, 1000)] for i in range(2000)],
    radius=20,
    gradient={0.4: 'blue', 0.6: 'cyan', 0.7: 'lime', 0.8: 'yellow', 1.0: 'red'}
)

m.add_layer(heatmap);
display(m)

Opendata Bristol¶

Example of OpenData Bristol on top of Bristol's map

In [6]:
air_quality_bristol_query = 'https://opendata.bristol.gov.uk/api/records/1.0/search/?dataset=air-quality-data-continuous&q=&rows=1000&sort=date_time&facet=date_time&facet=coordinates&facet=temp'

import urllib, json

response = urllib.request.urlopen(air_quality_bristol_query)

data = json.loads(response.read())

print('Json root keys')
print(data.keys())
print('3 records')
data['records'][:3]
Json root keys
dict_keys(['nhits', 'parameters', 'records', 'facet_groups'])
3 records
Out[6]:
[{'datasetid': 'air-quality-data-continuous',
  'recordid': '9e83da98d108cfea3ca5086f9deceef0a7d4b560',
  'fields': {'datestart': '2002-02-01T00:00:00+00:00',
   'temp': 6.769233,
   'date_time': '2021-02-22T09:00:00+00:00',
   'no': 77.781625,
   'geo_point_2d': [51.432675707, -2.60495665673],
   'current': 'True',
   'nox': 202.820627,
   'location': 'Parson Street School',
   'siteid': 215,
   'no2': 83.52844,
   'rh': 24.28571,
   'instrumenttype': 'Continuous (Reference)',
   'pm25': 5.7570217},
  'geometry': {'type': 'Point', 'coordinates': [-2.60495665673, 51.432675707]},
  'record_timestamp': '2021-02-22T09:15:09.027000+00:00'},
 {'datasetid': 'air-quality-data-continuous',
  'recordid': 'ca66c114dd2ab4c7ff850fa11489c2337a18b680',
  'fields': {'datestart': '2003-05-23T00:00:00+00:00',
   'date_time': '2021-02-22T09:00:00+00:00',
   'no': 54.93035,
   'geo_point_2d': [51.4278638883, -2.56374153315],
   'current': 'True',
   'nox': 149.987811,
   'location': 'Wells Road',
   'siteid': 270,
   'instrumenttype': 'Continuous (Reference)',
   'no2': 65.742188},
  'geometry': {'type': 'Point',
   'coordinates': [-2.56374153315, 51.4278638883]},
  'record_timestamp': '2021-02-22T09:15:09.027000+00:00'},
 {'datasetid': 'air-quality-data-continuous',
  'recordid': 'c3ea5efd38a31963b06dc2e60fe21eff6e9807bd',
  'fields': {'datestart': '2009-03-13T00:00:00+00:00',
   'date_time': '2021-02-22T09:00:00+00:00',
   'no': 24.721775,
   'geo_point_2d': [51.4780449714, -2.53523027459],
   'current': 'True',
   'nox': 86.158124,
   'location': 'Fishponds Road',
   'siteid': 463,
   'instrumenttype': 'Continuous (Reference)',
   'no2': 48.242812},
  'geometry': {'type': 'Point',
   'coordinates': [-2.53523027459, 51.4780449714]},
  'record_timestamp': '2021-02-22T09:15:09.027000+00:00'}]
In [8]:
m = Map(center=(51.454500, -2.587900), zoom=12)
gradient={0.4: 'blue', 0.6: 'cyan', 0.7: 'lime', 0.8: 'yellow', 1.0: 'red'}

position_id = []

field_map = {field: i for i, field in enumerate(fields)}
heatmap = Heatmap(
    locations=[[values[field_map['geo_point_2d']][0],
                values[field_map['geo_point_2d']][1],
                values[field_map['no']]] for values in record_values],
    radius=20,
    gradient=gradient
)

m.add_layer(heatmap);
display(m)

Pandas dataframe¶

We will use pandas to analyse the data

In [9]:
import pandas as pd
df_records = pd.DataFrame(record_values, columns=fields)
df_records[['Longitude', 'Latitude']] = pd.DataFrame(df_records['geo_point_2d'].tolist(), index= df_records.index)
df_records = df_records.sort_values(by='date_time')
df_records = df_records.drop_duplicates(subset=['Longitude', 'Latitude'], keep='first')
df_records
Out[9]:
date_time geo_point_2d temp no no2 nox Longitude Latitude
5999 2021-02-16T07:00:00+00:00 [51.4780449714, -2.53523027459] NaN 7.762575 27.540000 39.493124 51.478045 -2.535230
5975 2021-02-16T07:00:00+00:00 [51.4278638883, -2.56374153315] NaN 42.210949 39.158438 104.040000 51.427864 -2.563742
5969 2021-02-16T07:00:00+00:00 [51.4552693825, -2.59664882861] NaN 104.529084 99.054748 260.078792 51.455269 -2.596649
5976 2021-02-16T07:00:00+00:00 [51.432675707, -2.60495665673] 9.205129 27.309300 56.562188 98.541564 51.432676 -2.604957
5963 2021-02-16T07:00:00+00:00 [51.4628294172, -2.58454081635] NaN 1.123000 13.961000 15.682000 51.462829 -2.584541
5993 2021-02-16T07:00:00+00:00 [51.4579497129, -2.58398909033] NaN 5.239000 25.436000 33.469000 51.457950 -2.583989
5987 2021-02-16T07:00:00+00:00 [51.4417471802, -2.55995583224] NaN 4.270975 19.842188 26.440313 51.441747 -2.559956

Binary probabilistic classifier¶

Simulation of a probabilistic classifier with a linear transformation to convert raw values into the interval [0, 1]

In [12]:
m = Map(center=(51.454500, -2.587900), zoom=12)
gradient={0.0: 'blue', 1.0: 'red'}

heatmap = Heatmap(
    locations=locations,
    radius=40,
    gradient=gradient,
    min_opacity=0.5,
)

m.add_layer(heatmap);
display(m)
In [13]:
df_records
Out[13]:
date_time geo_point_2d temp no no2 nox Longitude Latitude no_scaled
5999 2021-02-16T07:00:00+00:00 [51.4780449714, -2.53523027459] NaN 7.762575 27.540000 39.493124 51.478045 -2.535230 0.064209
5975 2021-02-16T07:00:00+00:00 [51.4278638883, -2.56374153315] NaN 42.210949 39.158438 104.040000 51.427864 -2.563742 0.397346
5969 2021-02-16T07:00:00+00:00 [51.4552693825, -2.59664882861] NaN 104.529084 99.054748 260.078792 51.455269 -2.596649 1.000000
5976 2021-02-16T07:00:00+00:00 [51.432675707, -2.60495665673] 9.205129 27.309300 56.562188 98.541564 51.432676 -2.604957 0.253238
5963 2021-02-16T07:00:00+00:00 [51.4628294172, -2.58454081635] NaN 1.123000 13.961000 15.682000 51.462829 -2.584541 0.000000
5993 2021-02-16T07:00:00+00:00 [51.4579497129, -2.58398909033] NaN 5.239000 25.436000 33.469000 51.457950 -2.583989 0.039804
5987 2021-02-16T07:00:00+00:00 [51.4417471802, -2.55995583224] NaN 4.270975 19.842188 26.440313 51.441747 -2.559956 0.030443

Simulate probabilistic linear classifier¶

We start by a uniform value through all the map.

The points are uniformly distributed across the latitude and longitude at 2 degrees on every direction. Because the equirectangular projection the latitude degrees seem to be more spaced on the poles, and very concentrated near the equator. The longitudinal distribution is not affected on this representation.

In [14]:
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=0)


nlon, nlat = (180, 90)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = np.ones_like(lonv)*500 #100*(latv - latv.min())/(latv.max() - latv.min())

heatmap = Heatmap(
    locations=[[latv[i,j], lonv[i,j], probs[i,j]] for i in range(nlon) for j in range(nlat)],
    radius=10,
    gradient={0.0: 'blue', 1.0: 'red'}
)

m.add_layer(heatmap);
display(m)

North vs South¶

Simulation of a probabilistic classifier that predicts the probability of North vs South poles.

We will first generate a grid of 2 degrees in every direction and assign probabilities and will visualise in a Matplotlib heatmap.

In [15]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

nlon, nlat = (180, 90)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = 100*(latv - latv.min())/(latv.max() - latv.min())

fig = plt.figure(figsize=(15, 5))
for i, (name, matrix) in enumerate({'longitude': lonv, 'latitude': latv, 'probabilities': probs}.items()):
    ax = fig.add_subplot(1, 3, i+1)
    ax.set_title(name)
    img = ax.imshow(matrix.T)
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    fig.colorbar(img, cax=cax)

If we show the probability of North it seems that the probability of South has an alpha transparency of 1.0. Making it difficult to see the South (yellow) predictions.

In [16]:
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=0)

nlon, nlat = (180, 90)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = 100*(latv - latv.min())/(latv.max() - latv.min())

heatmap = Heatmap(
    locations=[[latv[i,j], lonv[i,j], probs[i,j]] for i in range(nlon) for j in range(nlat)],
    radius=20,
    gradient={0.0: 'yellow', 0.5: 'green', 1.0: 'blue'}
)

m.add_layer(heatmap);
display(m)
In [17]:
m = Map(center=(0, 0), zoom=0)

probs_s = 100 - probs

heatmap = Heatmap(
    locations=[[latv[i,j], lonv[i,j], probs_s[i,j]] for i in range(nlon) for j in range(nlat)],
    radius=20,
    gradient={0.0: 'blue', 0.5: 'green', 1.0: 'yellow'}
)

m.add_layer(heatmap);
display(m)

It is necessary to show the probability of both classes in separate layers, given that the probabilities near 0 have the alpha channel to 1 and are not visible.

In [18]:
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=0)


nlon, nlat = (180, 90)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = 100*(latv - latv.min())/(latv.max() - latv.min())

heatmap = Heatmap(
    locations=[[latv[i,j], lonv[i,j], probs[i,j]] for i in range(nlon) for j in range(nlat)],
    radius=20,
    gradient={0.0: 'yellow', 0.5: 'green', 1.0: 'blue'}
)

m.add_layer(heatmap);

probs = 100 - probs

heatmap = Heatmap(
    locations=[[latv[i,j], lonv[i,j], probs[i,j]] for i in range(nlon) for j in range(nlat)],
    radius=20,
    gradient={0.0: 'blue', 0.5: 'green', 1.0: 'yellow'}
)

m.add_layer(heatmap);
display(m)

Avoid projection deformation¶

In order to avoid the deformation of the grid from the sphere to the rectangle we can zoom into a smaller region of the map, as the deformation is less apparent. But the zoom and the heatmap still behave in a non-ideal way. It is very difficult to adjust a proper set of parameters for the heatmap to work with short and long zoom levels.

In [19]:
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(55, 0), zoom=4.3)


nlon, nlat = (100, 100)
lon = np.linspace(-10, 10, nlon)
lat = np.linspace(50, 60, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = 100*(latv - latv.min())/(latv.max() - latv.min())

color_per_class = [{0.0: 'yellow', 0.5: 'green', 1.0: 'blue'},
                   {0.0: 'blue', 0.5: 'green', 1.0: 'yellow'}]

for (gradient, values) in zip(*[color_per_class, [probs, 100 - probs]]):
    heatmap = Heatmap(
        locations=[[latv[i,j], lonv[i,j], values[i,j]] for i in range(nlon) for j in range(nlat)],
        radius=20,
        gradient=gradient
    )

    m.add_layer(heatmap);

Example of linear classifier¶

In [20]:
display(m)

Country contours and Search Engine¶

The following is an example of country contours and the option to search for country names.

In [22]:
import json
import os
import requests

from ipyleaflet import AwesomeIcon, GeoJSON, Map, Marker, LayerGroup, SearchControl

m = Map(zoom=3, center=[19.1646, 72.8493])

if not os.path.exists('countries.geo.json'):
      url = 'https://raw.githubusercontent.com/jupyter-widgets/ipyleaflet/master/examples/countries.geo.json'
      r = requests.get(url)
      with open('countries.geo.json', 'w') as f:
        f.write(r.content.decode("utf-8"))

with open("countries.geo.json") as f:
    data = json.load(f)

countries = GeoJSON(data=data)

layer_group = LayerGroup(layers=(countries,))
marker = Marker(icon=AwesomeIcon(name="check", marker_color='green', icon_color='darkred'))

m.add_control(SearchControl(
  position="topleft",
  layer=layer_group,
  zoom=4,
  property_name='name',
  marker=marker
))

m
In [23]:
from ipyleaflet import Map, ImageOverlay

m = Map(center=(25, -115), zoom=4)

image = ImageOverlay(
    #url="https://i.imgur.com/06Q1fSz.png",
    url="./06Q1fSz_alpha.png",
    #url='../06Q1fSz.png',
    bounds=((13, -130), (32, -100))
)

m.add_layer(image);
m
In [24]:
import ipyleaflet
import json
import pandas as pd
import os
import requests
from ipywidgets import link, FloatSlider
from branca.colormap import linear

def load_data(url, filename, file_type):
    r = requests.get(url)
    with open(filename, 'w') as f:
        f.write(r.content.decode("utf-8"))
    with open(filename, 'r') as f:
        return file_type(f)

geo_json_data = load_data(
    'https://raw.githubusercontent.com/jupyter-widgets/ipyleaflet/master/examples/us-states.json',
    'us-states.json',
     json.load)

unemployment = load_data(
    'https://raw.githubusercontent.com/jupyter-widgets/ipyleaflet/master/examples/US_Unemployment_Oct2012.csv',
    'US_Unemployment_Oct2012.csv',
     pd.read_csv)

unemployment =  dict(zip(unemployment['State'].tolist(), unemployment['Unemployment'].tolist()))

layer = ipyleaflet.Choropleth(
    geo_data=geo_json_data,
    choro_data=unemployment,
    colormap=linear.YlOrRd_04,
    border_color='black',
    style={'fillOpacity': 0.8, 'dashArray': '5, 5'})

m = ipyleaflet.Map(center = (43,-100), zoom = 4)
m.add_layer(layer)
m

The two coordinates in the map are

  • longitude : [-180, 180] (West of the map, East of the map)
  • latitude : [-90, 90] (South pole, North pole)

Altough while writing or speaking it is more common to use only positive values, and indicate West as a negative longitude and South as a negative latitude. The longitude zero corresponds to the prime meridian which crosses Greenwich, while the latitude zero corresponds to the equator line. The Null island is in the intersection of these two lines (0N, 0E) in hte Gulf of Guinea which depicts a weather buoy maintained by PIRATA (Prediction and Research Moored Array in the Atlantic).

The coordinates are always specified as latitude first and longitude second, eg. Bristol center is 51.4545° N, 2.5879° W

In [25]:
import ipyleaflet
import json
import pandas as pd
import os
import requests
from ipywidgets import link, FloatSlider
from branca.colormap import linear

regions = {
    "type": "FeatureCollection",
    "features":[{
        "type":"Feature",
        "id":"YS",
        "properties":{"name":"Yellow Square"},
        "geometry":{
            "type":"Polygon",
            "coordinates": [[[-2,2],
                             [-2,-2],
                             [2,-2],
                             [2,2]]]
        }
    },
    {
        "type":"Feature",
        "id":"RS",
        "properties":{"name":"Red Square"},
        "geometry":{
            "type":"Polygon",
            "coordinates": [[[2,2],
                             [8,2],
                             [8,8],
                             [2,8]]]
        }
    }]
}

colors = {'YS': 0, 'RS': 1}

layer = ipyleaflet.Choropleth(
    geo_data=regions,
    choro_data=colors,
    colormap=linear.YlOrRd_04,
    border_color='black',
    style={'fillOpacity': 0.7, 'dashArray': '5, 5'})

m = ipyleaflet.Map(center = (0,0), zoom = 4)
m.add_layer(layer)
m

Classification problem¶

First we will generate a synthethic dataset

In [26]:
spain_center = (40.4637, -3.7492) # latitude longitude (+N, +E) or (-S, -W)
france_center = (46.2276, 2.2137) # latitude, longitude
andorra_center = (42.5063, 1.5218) # 

def latlong_to_xy(coordinates):
    ''' Convert coordinates from X,Y space into latitude and longitude'''
    if len(coordinates) > 0 and (type(coordinates[0]) not in [float, int]):
        return [list(reversed(ll)) for ll in coordinates]
    return list(reversed(coordinates))

def xy_to_latlong(coordinates):
    ''' Convert coordinates from latitude longitude space into X,Y space'''
    return latlong_to_xy(coordinates)

Example with two Gaussians¶

In [27]:
# Two Gaussians
samples_c0 = 500
samples_c1 = 500
x_class_0 = np.random.multivariate_normal(mean=spain_center, cov=[[12,0],[0,12]], size=samples_c0)
x_class_1 = np.random.multivariate_normal(mean=france_center, cov=[[12,0],[0,12]], size=samples_c1)
y_class_0 = np.zeros((samples_c0, 1), dtype=int)
y_class_1 = np.ones((samples_c1, 1), dtype=int)

x = np.vstack((x_class_0, x_class_1))
y = np.vstack((y_class_0, y_class_1)).squeeze()

from sklearn.utils import shuffle

x, y = shuffle(x, y)

Example with 2 clusters per class¶

Multiple blobs per class¶

In [29]:
from sklearn.datasets import make_blobs

x, y = make_blobs(n_samples=500, n_features=2, centers=20,
                  cluster_std=2.0, center_box=(-20.0, 20.0),
                  shuffle=True, random_state=42)

x += andorra_center

y[y<10] = 0
y[y>=10] = 1
In [30]:
fig, ax = plt.subplots(1)
scatter = ax.scatter(x[:,1], x[:,0], c=y, alpha=0.7)
ax.legend(handles=scatter.legend_elements(num=[0,1])[0], labels=['spain', 'france'])
Out[30]:
<matplotlib.legend.Legend at 0x7f87f894fc10>
In [31]:
locations_c0 = np.hstack((x[y==0], np.ones((sum(y==0), 1))))
locations_c1 = np.hstack((x[y==1], np.ones((sum(y==1), 1))))
In [32]:
from ipyleaflet import Map, basemaps, basemap_to_tiles, Circle
m = Map(center=andorra_center, zoom=4)

class_color = ['blue', 'yellow']
# FIXME One layer per circle gets laggy, we show only 100 points
for sample_x, sample_y in zip(x[:100].tolist(), y[:100]):
    circle = Circle()
    circle.location = sample_x
    circle.radius = 100
    circle.color = class_color[sample_y]
    circle.fill_color = class_color[sample_y]

    m.add_layer(circle)
display(m)
In [33]:
m = Map(center=(40.4637, -3.7492), zoom=4)

for color, location in [('blue', locations_c0.tolist()),
                        ('yellow', locations_c1.tolist())]:
    heatmap = Heatmap(
        locations=location,
        radius=5,
        gradient={0.0: color, 1.0: color},
        min_opacity=1,
    )

    m.add_layer(heatmap);

display(m)

We train a Logistic Regression to discriminate the two classes

In [34]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

#clf = LogisticRegression()
#clf = RandomForestClassifier()
clf = SVC(C=10000, probability=True)
clf.fit(x, y)

print('Training accuracy = {:.3f}'.format(np.mean(clf.predict(x) == y)))
Training accuracy = 0.848

And next we see the probabilities given in a large region around both classes

In [35]:
nlon, nlat = (200, 200)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
center = andorra_center

# FIXME Use the predicted probabilities
probs = clf.predict_proba(np.hstack((latv.reshape(-1,1), lonv.reshape(-1,1))))
#probs = np.log((center -np.hstack((lonv.reshape(-1, 1), latv.reshape(-1,1))))**2)/100
#probs = np.hstack((probs, -1*probs))


m = Map(center=center, zoom=4)

for c, gradient in enumerate([{0.0: 'blue', 1.0: 'blue'},
                              {0.0: 'yellow', 1.0: 'yellow'}]):
    location_prob_c = probs[:,c].reshape(lonv.shape)*100 # FIXME If I select a value between 0 and 1, the color is barely visible

    heatmap = Heatmap(
        locations=[[latv[i,j], lonv[i,j], location_prob_c[i,j]] for i in range(nlon) for j in range(nlat)],
        radius=40,
        gradient=gradient,
        min_opacity=0.0,
    )

    m.add_layer(heatmap);


display(m)
In [37]:
# Obtain the contour lines for the latitude and longitude variant
fig, ax = plt.subplots(1, figsize=(12, 9))
cs = ax.contour(latv, lonv, probs[:,1].reshape(lonv.shape),
                 [0, 0.000001, 0.0001, 0.1, 0.3, 0.5,
                  0.7, 0.9, 0.9999, 0.999999, 1]);
ax.scatter(x[:,0], x[:,1], c=y, edgecolor='black')
ax.set_ylim([-30, 30])
ax.set_xlim([20, 60])
Out[37]:
(20.0, 60.0)
In [38]:
from ipyleaflet import Map, Polyline

m = Map(center=andorra_center, zoom=5)

# FIXME The current code only uses one of the lines for each level (see p.get_paths()[0])
lines = [p.get_paths()[0].vertices.tolist() for p in cs.collections if p.get_paths()]
colors = [linear.viridis(i/len(lines)) for i in range(len(lines))]

for (line, color) in zip(lines, colors):
    l_layer = Polyline(
        locations=line,
            color=color,
            fill=False
        )
    m.add_layer(l_layer)

m
In [ ]:
 
In [ ]: