Content of the first slide
np.random.randint(low=1, high=7, size=15)
array([6, 2, 6, 4, 4, 3, 6, 6, 4, 6, 5, 4, 2, 4, 1])
Example of ipyleaflet showing maps
center = (52.204793, 360.121558)
m = Map(center=center, zoom=15)
marker = Marker(location=center, draggable=True)
m.add_layer(marker);
display(m)
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=2)
heatmap = Heatmap(
locations=[[uniform(-80, 80), uniform(-180, 180), uniform(0, 1000)] for i in range(2000)],
radius=20,
gradient={0.4: 'blue', 0.6: 'cyan', 0.7: 'lime', 0.8: 'yellow', 1.0: 'red'}
)
m.add_layer(heatmap);
display(m)
Example of OpenData Bristol on top of Bristol's map
air_quality_bristol_query = 'https://opendata.bristol.gov.uk/api/records/1.0/search/?dataset=air-quality-data-continuous&q=&rows=1000&sort=date_time&facet=date_time&facet=coordinates&facet=temp'
import urllib, json
response = urllib.request.urlopen(air_quality_bristol_query)
data = json.loads(response.read())
print('Json root keys')
print(data.keys())
print('3 records')
data['records'][:3]
Json root keys dict_keys(['nhits', 'parameters', 'records', 'facet_groups']) 3 records
[{'datasetid': 'air-quality-data-continuous',
'recordid': '9e83da98d108cfea3ca5086f9deceef0a7d4b560',
'fields': {'datestart': '2002-02-01T00:00:00+00:00',
'temp': 6.769233,
'date_time': '2021-02-22T09:00:00+00:00',
'no': 77.781625,
'geo_point_2d': [51.432675707, -2.60495665673],
'current': 'True',
'nox': 202.820627,
'location': 'Parson Street School',
'siteid': 215,
'no2': 83.52844,
'rh': 24.28571,
'instrumenttype': 'Continuous (Reference)',
'pm25': 5.7570217},
'geometry': {'type': 'Point', 'coordinates': [-2.60495665673, 51.432675707]},
'record_timestamp': '2021-02-22T09:15:09.027000+00:00'},
{'datasetid': 'air-quality-data-continuous',
'recordid': 'ca66c114dd2ab4c7ff850fa11489c2337a18b680',
'fields': {'datestart': '2003-05-23T00:00:00+00:00',
'date_time': '2021-02-22T09:00:00+00:00',
'no': 54.93035,
'geo_point_2d': [51.4278638883, -2.56374153315],
'current': 'True',
'nox': 149.987811,
'location': 'Wells Road',
'siteid': 270,
'instrumenttype': 'Continuous (Reference)',
'no2': 65.742188},
'geometry': {'type': 'Point',
'coordinates': [-2.56374153315, 51.4278638883]},
'record_timestamp': '2021-02-22T09:15:09.027000+00:00'},
{'datasetid': 'air-quality-data-continuous',
'recordid': 'c3ea5efd38a31963b06dc2e60fe21eff6e9807bd',
'fields': {'datestart': '2009-03-13T00:00:00+00:00',
'date_time': '2021-02-22T09:00:00+00:00',
'no': 24.721775,
'geo_point_2d': [51.4780449714, -2.53523027459],
'current': 'True',
'nox': 86.158124,
'location': 'Fishponds Road',
'siteid': 463,
'instrumenttype': 'Continuous (Reference)',
'no2': 48.242812},
'geometry': {'type': 'Point',
'coordinates': [-2.53523027459, 51.4780449714]},
'record_timestamp': '2021-02-22T09:15:09.027000+00:00'}]
m = Map(center=(51.454500, -2.587900), zoom=12)
gradient={0.4: 'blue', 0.6: 'cyan', 0.7: 'lime', 0.8: 'yellow', 1.0: 'red'}
position_id = []
field_map = {field: i for i, field in enumerate(fields)}
heatmap = Heatmap(
locations=[[values[field_map['geo_point_2d']][0],
values[field_map['geo_point_2d']][1],
values[field_map['no']]] for values in record_values],
radius=20,
gradient=gradient
)
m.add_layer(heatmap);
display(m)
We will use pandas to analyse the data
import pandas as pd
df_records = pd.DataFrame(record_values, columns=fields)
df_records[['Longitude', 'Latitude']] = pd.DataFrame(df_records['geo_point_2d'].tolist(), index= df_records.index)
df_records = df_records.sort_values(by='date_time')
df_records = df_records.drop_duplicates(subset=['Longitude', 'Latitude'], keep='first')
df_records
| date_time | geo_point_2d | temp | no | no2 | nox | Longitude | Latitude | |
|---|---|---|---|---|---|---|---|---|
| 5999 | 2021-02-16T07:00:00+00:00 | [51.4780449714, -2.53523027459] | NaN | 7.762575 | 27.540000 | 39.493124 | 51.478045 | -2.535230 |
| 5975 | 2021-02-16T07:00:00+00:00 | [51.4278638883, -2.56374153315] | NaN | 42.210949 | 39.158438 | 104.040000 | 51.427864 | -2.563742 |
| 5969 | 2021-02-16T07:00:00+00:00 | [51.4552693825, -2.59664882861] | NaN | 104.529084 | 99.054748 | 260.078792 | 51.455269 | -2.596649 |
| 5976 | 2021-02-16T07:00:00+00:00 | [51.432675707, -2.60495665673] | 9.205129 | 27.309300 | 56.562188 | 98.541564 | 51.432676 | -2.604957 |
| 5963 | 2021-02-16T07:00:00+00:00 | [51.4628294172, -2.58454081635] | NaN | 1.123000 | 13.961000 | 15.682000 | 51.462829 | -2.584541 |
| 5993 | 2021-02-16T07:00:00+00:00 | [51.4579497129, -2.58398909033] | NaN | 5.239000 | 25.436000 | 33.469000 | 51.457950 | -2.583989 |
| 5987 | 2021-02-16T07:00:00+00:00 | [51.4417471802, -2.55995583224] | NaN | 4.270975 | 19.842188 | 26.440313 | 51.441747 | -2.559956 |
Simulation of a probabilistic classifier with a linear transformation to convert raw values into the interval [0, 1]
m = Map(center=(51.454500, -2.587900), zoom=12)
gradient={0.0: 'blue', 1.0: 'red'}
heatmap = Heatmap(
locations=locations,
radius=40,
gradient=gradient,
min_opacity=0.5,
)
m.add_layer(heatmap);
display(m)
df_records
| date_time | geo_point_2d | temp | no | no2 | nox | Longitude | Latitude | no_scaled | |
|---|---|---|---|---|---|---|---|---|---|
| 5999 | 2021-02-16T07:00:00+00:00 | [51.4780449714, -2.53523027459] | NaN | 7.762575 | 27.540000 | 39.493124 | 51.478045 | -2.535230 | 0.064209 |
| 5975 | 2021-02-16T07:00:00+00:00 | [51.4278638883, -2.56374153315] | NaN | 42.210949 | 39.158438 | 104.040000 | 51.427864 | -2.563742 | 0.397346 |
| 5969 | 2021-02-16T07:00:00+00:00 | [51.4552693825, -2.59664882861] | NaN | 104.529084 | 99.054748 | 260.078792 | 51.455269 | -2.596649 | 1.000000 |
| 5976 | 2021-02-16T07:00:00+00:00 | [51.432675707, -2.60495665673] | 9.205129 | 27.309300 | 56.562188 | 98.541564 | 51.432676 | -2.604957 | 0.253238 |
| 5963 | 2021-02-16T07:00:00+00:00 | [51.4628294172, -2.58454081635] | NaN | 1.123000 | 13.961000 | 15.682000 | 51.462829 | -2.584541 | 0.000000 |
| 5993 | 2021-02-16T07:00:00+00:00 | [51.4579497129, -2.58398909033] | NaN | 5.239000 | 25.436000 | 33.469000 | 51.457950 | -2.583989 | 0.039804 |
| 5987 | 2021-02-16T07:00:00+00:00 | [51.4417471802, -2.55995583224] | NaN | 4.270975 | 19.842188 | 26.440313 | 51.441747 | -2.559956 | 0.030443 |
We start by a uniform value through all the map.
The points are uniformly distributed across the latitude and longitude at 2 degrees on every direction. Because the equirectangular projection the latitude degrees seem to be more spaced on the poles, and very concentrated near the equator. The longitudinal distribution is not affected on this representation.
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=0)
nlon, nlat = (180, 90)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = np.ones_like(lonv)*500 #100*(latv - latv.min())/(latv.max() - latv.min())
heatmap = Heatmap(
locations=[[latv[i,j], lonv[i,j], probs[i,j]] for i in range(nlon) for j in range(nlat)],
radius=10,
gradient={0.0: 'blue', 1.0: 'red'}
)
m.add_layer(heatmap);
display(m)
Simulation of a probabilistic classifier that predicts the probability of North vs South poles.
We will first generate a grid of 2 degrees in every direction and assign probabilities and will visualise in a Matplotlib heatmap.
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
nlon, nlat = (180, 90)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = 100*(latv - latv.min())/(latv.max() - latv.min())
fig = plt.figure(figsize=(15, 5))
for i, (name, matrix) in enumerate({'longitude': lonv, 'latitude': latv, 'probabilities': probs}.items()):
ax = fig.add_subplot(1, 3, i+1)
ax.set_title(name)
img = ax.imshow(matrix.T)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
fig.colorbar(img, cax=cax)
If we show the probability of North it seems that the probability of South has an alpha transparency of 1.0. Making it difficult to see the South (yellow) predictions.
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=0)
nlon, nlat = (180, 90)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = 100*(latv - latv.min())/(latv.max() - latv.min())
heatmap = Heatmap(
locations=[[latv[i,j], lonv[i,j], probs[i,j]] for i in range(nlon) for j in range(nlat)],
radius=20,
gradient={0.0: 'yellow', 0.5: 'green', 1.0: 'blue'}
)
m.add_layer(heatmap);
display(m)
m = Map(center=(0, 0), zoom=0)
probs_s = 100 - probs
heatmap = Heatmap(
locations=[[latv[i,j], lonv[i,j], probs_s[i,j]] for i in range(nlon) for j in range(nlat)],
radius=20,
gradient={0.0: 'blue', 0.5: 'green', 1.0: 'yellow'}
)
m.add_layer(heatmap);
display(m)
It is necessary to show the probability of both classes in separate layers, given that the probabilities near 0 have the alpha channel to 1 and are not visible.
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=0)
nlon, nlat = (180, 90)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = 100*(latv - latv.min())/(latv.max() - latv.min())
heatmap = Heatmap(
locations=[[latv[i,j], lonv[i,j], probs[i,j]] for i in range(nlon) for j in range(nlat)],
radius=20,
gradient={0.0: 'yellow', 0.5: 'green', 1.0: 'blue'}
)
m.add_layer(heatmap);
probs = 100 - probs
heatmap = Heatmap(
locations=[[latv[i,j], lonv[i,j], probs[i,j]] for i in range(nlon) for j in range(nlat)],
radius=20,
gradient={0.0: 'blue', 0.5: 'green', 1.0: 'yellow'}
)
m.add_layer(heatmap);
display(m)
In order to avoid the deformation of the grid from the sphere to the rectangle we can zoom into a smaller region of the map, as the deformation is less apparent. But the zoom and the heatmap still behave in a non-ideal way. It is very difficult to adjust a proper set of parameters for the heatmap to work with short and long zoom levels.
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(55, 0), zoom=4.3)
nlon, nlat = (100, 100)
lon = np.linspace(-10, 10, nlon)
lat = np.linspace(50, 60, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
probs = 100*(latv - latv.min())/(latv.max() - latv.min())
color_per_class = [{0.0: 'yellow', 0.5: 'green', 1.0: 'blue'},
{0.0: 'blue', 0.5: 'green', 1.0: 'yellow'}]
for (gradient, values) in zip(*[color_per_class, [probs, 100 - probs]]):
heatmap = Heatmap(
locations=[[latv[i,j], lonv[i,j], values[i,j]] for i in range(nlon) for j in range(nlat)],
radius=20,
gradient=gradient
)
m.add_layer(heatmap);
display(m)
The following is an example of country contours and the option to search for country names.
import json
import os
import requests
from ipyleaflet import AwesomeIcon, GeoJSON, Map, Marker, LayerGroup, SearchControl
m = Map(zoom=3, center=[19.1646, 72.8493])
if not os.path.exists('countries.geo.json'):
url = 'https://raw.githubusercontent.com/jupyter-widgets/ipyleaflet/master/examples/countries.geo.json'
r = requests.get(url)
with open('countries.geo.json', 'w') as f:
f.write(r.content.decode("utf-8"))
with open("countries.geo.json") as f:
data = json.load(f)
countries = GeoJSON(data=data)
layer_group = LayerGroup(layers=(countries,))
marker = Marker(icon=AwesomeIcon(name="check", marker_color='green', icon_color='darkred'))
m.add_control(SearchControl(
position="topleft",
layer=layer_group,
zoom=4,
property_name='name',
marker=marker
))
m
from ipyleaflet import Map, ImageOverlay
m = Map(center=(25, -115), zoom=4)
image = ImageOverlay(
#url="https://i.imgur.com/06Q1fSz.png",
url="./06Q1fSz_alpha.png",
#url='../06Q1fSz.png',
bounds=((13, -130), (32, -100))
)
m.add_layer(image);
m
import ipyleaflet
import json
import pandas as pd
import os
import requests
from ipywidgets import link, FloatSlider
from branca.colormap import linear
def load_data(url, filename, file_type):
r = requests.get(url)
with open(filename, 'w') as f:
f.write(r.content.decode("utf-8"))
with open(filename, 'r') as f:
return file_type(f)
geo_json_data = load_data(
'https://raw.githubusercontent.com/jupyter-widgets/ipyleaflet/master/examples/us-states.json',
'us-states.json',
json.load)
unemployment = load_data(
'https://raw.githubusercontent.com/jupyter-widgets/ipyleaflet/master/examples/US_Unemployment_Oct2012.csv',
'US_Unemployment_Oct2012.csv',
pd.read_csv)
unemployment = dict(zip(unemployment['State'].tolist(), unemployment['Unemployment'].tolist()))
layer = ipyleaflet.Choropleth(
geo_data=geo_json_data,
choro_data=unemployment,
colormap=linear.YlOrRd_04,
border_color='black',
style={'fillOpacity': 0.8, 'dashArray': '5, 5'})
m = ipyleaflet.Map(center = (43,-100), zoom = 4)
m.add_layer(layer)
m
The two coordinates in the map are
Altough while writing or speaking it is more common to use only positive values, and indicate West as a negative longitude and South as a negative latitude. The longitude zero corresponds to the prime meridian which crosses Greenwich, while the latitude zero corresponds to the equator line. The Null island is in the intersection of these two lines (0N, 0E) in hte Gulf of Guinea which depicts a weather buoy maintained by PIRATA (Prediction and Research Moored Array in the Atlantic).
The coordinates are always specified as latitude first and longitude second, eg. Bristol center is 51.4545° N, 2.5879° W
import ipyleaflet
import json
import pandas as pd
import os
import requests
from ipywidgets import link, FloatSlider
from branca.colormap import linear
regions = {
"type": "FeatureCollection",
"features":[{
"type":"Feature",
"id":"YS",
"properties":{"name":"Yellow Square"},
"geometry":{
"type":"Polygon",
"coordinates": [[[-2,2],
[-2,-2],
[2,-2],
[2,2]]]
}
},
{
"type":"Feature",
"id":"RS",
"properties":{"name":"Red Square"},
"geometry":{
"type":"Polygon",
"coordinates": [[[2,2],
[8,2],
[8,8],
[2,8]]]
}
}]
}
colors = {'YS': 0, 'RS': 1}
layer = ipyleaflet.Choropleth(
geo_data=regions,
choro_data=colors,
colormap=linear.YlOrRd_04,
border_color='black',
style={'fillOpacity': 0.7, 'dashArray': '5, 5'})
m = ipyleaflet.Map(center = (0,0), zoom = 4)
m.add_layer(layer)
m
First we will generate a synthethic dataset
spain_center = (40.4637, -3.7492) # latitude longitude (+N, +E) or (-S, -W)
france_center = (46.2276, 2.2137) # latitude, longitude
andorra_center = (42.5063, 1.5218) #
def latlong_to_xy(coordinates):
''' Convert coordinates from X,Y space into latitude and longitude'''
if len(coordinates) > 0 and (type(coordinates[0]) not in [float, int]):
return [list(reversed(ll)) for ll in coordinates]
return list(reversed(coordinates))
def xy_to_latlong(coordinates):
''' Convert coordinates from latitude longitude space into X,Y space'''
return latlong_to_xy(coordinates)
# Two Gaussians
samples_c0 = 500
samples_c1 = 500
x_class_0 = np.random.multivariate_normal(mean=spain_center, cov=[[12,0],[0,12]], size=samples_c0)
x_class_1 = np.random.multivariate_normal(mean=france_center, cov=[[12,0],[0,12]], size=samples_c1)
y_class_0 = np.zeros((samples_c0, 1), dtype=int)
y_class_1 = np.ones((samples_c1, 1), dtype=int)
x = np.vstack((x_class_0, x_class_1))
y = np.vstack((y_class_0, y_class_1)).squeeze()
from sklearn.utils import shuffle
x, y = shuffle(x, y)
from sklearn.datasets import make_blobs
x, y = make_blobs(n_samples=500, n_features=2, centers=20,
cluster_std=2.0, center_box=(-20.0, 20.0),
shuffle=True, random_state=42)
x += andorra_center
y[y<10] = 0
y[y>=10] = 1
fig, ax = plt.subplots(1)
scatter = ax.scatter(x[:,1], x[:,0], c=y, alpha=0.7)
ax.legend(handles=scatter.legend_elements(num=[0,1])[0], labels=['spain', 'france'])
<matplotlib.legend.Legend at 0x7f87f894fc10>
locations_c0 = np.hstack((x[y==0], np.ones((sum(y==0), 1))))
locations_c1 = np.hstack((x[y==1], np.ones((sum(y==1), 1))))
from ipyleaflet import Map, basemaps, basemap_to_tiles, Circle
m = Map(center=andorra_center, zoom=4)
class_color = ['blue', 'yellow']
# FIXME One layer per circle gets laggy, we show only 100 points
for sample_x, sample_y in zip(x[:100].tolist(), y[:100]):
circle = Circle()
circle.location = sample_x
circle.radius = 100
circle.color = class_color[sample_y]
circle.fill_color = class_color[sample_y]
m.add_layer(circle)
display(m)
m = Map(center=(40.4637, -3.7492), zoom=4)
for color, location in [('blue', locations_c0.tolist()),
('yellow', locations_c1.tolist())]:
heatmap = Heatmap(
locations=location,
radius=5,
gradient={0.0: color, 1.0: color},
min_opacity=1,
)
m.add_layer(heatmap);
display(m)
We train a Logistic Regression to discriminate the two classes
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
#clf = LogisticRegression()
#clf = RandomForestClassifier()
clf = SVC(C=10000, probability=True)
clf.fit(x, y)
print('Training accuracy = {:.3f}'.format(np.mean(clf.predict(x) == y)))
Training accuracy = 0.848
And next we see the probabilities given in a large region around both classes
nlon, nlat = (200, 200)
lon = np.linspace(-180, 180, nlon)
lat = np.linspace(-90, 90, nlat)
lonv, latv = np.meshgrid(lon, lat, indexing='ij')
center = andorra_center
# FIXME Use the predicted probabilities
probs = clf.predict_proba(np.hstack((latv.reshape(-1,1), lonv.reshape(-1,1))))
#probs = np.log((center -np.hstack((lonv.reshape(-1, 1), latv.reshape(-1,1))))**2)/100
#probs = np.hstack((probs, -1*probs))
m = Map(center=center, zoom=4)
for c, gradient in enumerate([{0.0: 'blue', 1.0: 'blue'},
{0.0: 'yellow', 1.0: 'yellow'}]):
location_prob_c = probs[:,c].reshape(lonv.shape)*100 # FIXME If I select a value between 0 and 1, the color is barely visible
heatmap = Heatmap(
locations=[[latv[i,j], lonv[i,j], location_prob_c[i,j]] for i in range(nlon) for j in range(nlat)],
radius=40,
gradient=gradient,
min_opacity=0.0,
)
m.add_layer(heatmap);
display(m)
# Obtain the contour lines for the latitude and longitude variant
fig, ax = plt.subplots(1, figsize=(12, 9))
cs = ax.contour(latv, lonv, probs[:,1].reshape(lonv.shape),
[0, 0.000001, 0.0001, 0.1, 0.3, 0.5,
0.7, 0.9, 0.9999, 0.999999, 1]);
ax.scatter(x[:,0], x[:,1], c=y, edgecolor='black')
ax.set_ylim([-30, 30])
ax.set_xlim([20, 60])
(20.0, 60.0)
from ipyleaflet import Map, Polyline
m = Map(center=andorra_center, zoom=5)
# FIXME The current code only uses one of the lines for each level (see p.get_paths()[0])
lines = [p.get_paths()[0].vertices.tolist() for p in cs.collections if p.get_paths()]
colors = [linear.viridis(i/len(lines)) for i in range(len(lines))]
for (line, color) in zip(lines, colors):
l_layer = Polyline(
locations=line,
color=color,
fill=False
)
m.add_layer(l_layer)
m