# Make sure you have ucamcl_alg_utils.py in the current directory. It's in the
# repository with all the other notebooks. If you don't have your own copy of the
# repository, you can get the file from
# https://gitlab.developers.cam.ac.uk/djw1005/algorithms/-/blob/master/ucamcl_alg_utils.py
from ucamcl_alg_utils import DirectedGraph, UndirectedGraph, Stack, Queue, PriorityQueue
The "pseudocode" given in lecture notes is mostly actual working Python code. It is based on an Adjacency List representation of a graph, wrapped up as a nice Python object. The ucamcl_alg_utils
package contains two classes, DirectedGraph
and UndirectedGraph
, both of which allow edges to be either labelled or unlabelled. Initialize them with a list of edges, as illustrated in the code snippets below.
When you create a graph, you specify the edges by giving vertex ids; the graph will create a Vertex object for each vertex id. This allows you to attach attributes to each Vertex object, e.g. v.distance
or v.seen
. These two classes provide the following:
g.vertices
contains all the vertices, stored as Vertex
objectsg.vertex['A']
gets the Vertex object for vertex id 'A'g.edges
is a list [(v,u),...] or [(v,u,lbl),...], giving the Vertex at each end and (if provided) the edge labelv.neighbours
is a list [u,...] or [(u,lbl),...], giving the neighbouring Vertex and (if provided) the edge label# Example use: Euler's graph for Koenigsberg
# stored as a directed graph (two edges, one in each direction, per bridge)
g = DirectedGraph([('A','B'),('A','B'),('A','D'),
('B','A'),('B','A'),('B','C'),('B','C'),('B','D'),
('C','B'),('C','B'),('C','D'),
('D','A'),('D','B'),('D','C')])
for v in g.vertices:
print(f'{v} has {len(v.neighbours)} neighbours')
C has 3 neighbours A has 3 neighbours B has 5 neighbours D has 3 neighbours
# Example use: a directed graph with edge weights
g = DirectedGraph([('a','b',1), ('b','c',2), ('c','d',3), ('d','b',-6)])
for u,v,w in g.edges:
print(f'The {u}→{v} edge has weight {w}')
The a→b edge has weight 1 The b→c edge has weight 2 The c→d edge has weight 3 The d→b edge has weight -6
edges = [('A','B'),('B','E'),('B','F'),('F','G'),('A','C'),('A','D'),('D','H')]
g = UndirectedGraph(edges)
g2 = UndirectedGraph(edges + [('C','D')])
# Bad attempt 1
def visit_tree(v):
print("visiting", v)
for w in v.neighbours:
visit_tree(w)
# This next command will fail with "RecursionError: maximum recursion depth exceeded"
# visit_tree(g.vertex['B'])
# Bad attempt 2
def visit_tree(v, v_parent):
print("visiting", v)
for w in v.neighbours:
if w != v_parent:
visit_tree(w, v)
visit_tree(g.vertex['B'], None)
# This next command will fail with "RecursionError: maximum recursion depth exceeded"
# visit_tree(g2.vertex['D'], None)
visiting B visiting E visiting F visiting G visiting A visiting C visiting D visiting H
def dfs_recurse(g, s):
for v in g.vertices:
v.visited = False
visit(s)
# The plain algorithm for visit()
def visit(v):
v.visited = True
for w in v.neighbours:
if not w.visited:
visit(w)
# A fancy version of visit() that prints out its stack trace
def visit(v, prefix=''):
v.visited = True
print(f'{prefix}visit({v}): neighbours {", ".join([w.id for w in v.neighbours])}')
prefix = prefix + '| '
for w in v.neighbours:
if not w.visited:
visit(w, prefix)
else:
print(f"{prefix}don't visit {w}")
print(f'{prefix}return from visit({v})')
dfs_recurse(g2, g2.vertex['D'])
visit(D): neighbours H, A, C | visit(H): neighbours D | | don't visit D | | return from visit(H) | visit(A): neighbours B, C, D | | visit(B): neighbours E, F, A | | | visit(E): neighbours B | | | | don't visit B | | | | return from visit(E) | | | visit(F): neighbours G, B | | | | visit(G): neighbours F | | | | | don't visit F | | | | | return from visit(G) | | | | don't visit B | | | | return from visit(F) | | | don't visit A | | | return from visit(B) | | visit(C): neighbours D, A | | | don't visit D | | | don't visit A | | | return from visit(C) | | don't visit D | | return from visit(A) | don't visit C | return from visit(D)
def dfs(g, s):
for v in g.vertices:
v.seen = False
toexplore = Stack([s])
s.seen = True
while not toexplore.is_empty():
print(f'toexplore={toexplore}', end=' ... ')
v = toexplore.popright()
print(f'visiting {v}')
for w in v.neighbours:
if not w.seen:
print(f' new neighbour {w}')
toexplore.pushright(w)
w.seen = True
else:
print(f' seen neighbour {w}')
dfs(g2, g2.vertex['B'])
toexplore=[B] ... visiting B new neighbour E new neighbour F new neighbour A toexplore=[E,F,A] ... visiting A seen neighbour B new neighbour C new neighbour D toexplore=[E,F,C,D] ... visiting D new neighbour H seen neighbour A seen neighbour C toexplore=[E,F,C,H] ... visiting H seen neighbour D toexplore=[E,F,C] ... visiting C seen neighbour D seen neighbour A toexplore=[E,F] ... visiting F new neighbour G seen neighbour B toexplore=[E,G] ... visiting G seen neighbour F toexplore=[E] ... visiting E seen neighbour B
def bfs(g, s):
for v in g.vertices:
v.seen = False
toexplore = Queue([s])
s.seen = True
while not toexplore.is_empty():
print(f'toexplore {toexplore}', end='... ')
v = toexplore.popleft()
print(f'visiting {v}')
for w in v.neighbours:
if not w.seen:
toexplore.pushright(w)
w.seen = True
else:
print(f' seen neighbour {w}')
def bfs_path(g, s, t):
for v in g.vertices:
v.seen = False
v.come_from = None
s.seen = True
toexplore = Queue([s])
# Traverse the graph, visiting everything reachable from s
while not toexplore.is_empty():
v = toexplore.popleft()
for w in v.neighbours:
if not w.seen:
toexplore.pushright(w)
w.seen = True
w.come_from = v
# Reconstruct the full path from s to t, working backwards
if t.come_from is None:
return None
else:
path = [t]
while path[0].come_from != s:
path.insert(0, path[0].come_from) # i.e. prepend
path.insert(0,s)
return path
g = DirectedGraph([('a','b'),('a','d'),('b','e'),('b','d'),('b','c'),('c','a'),('d','a'),('d','c')])
bfs(g, g.vertex['a'])
toexplore [a]... visiting a toexplore [b,d]... visiting b seen neighbour d toexplore [d,e,c]... visiting d seen neighbour a seen neighbour c toexplore [e,c]... visiting e toexplore [c]... visiting c seen neighbour a
p = bfs_path(g, g.vertex['a'], g.vertex['e'])
' -> '.join(str(s) for s in p)
'a -> b -> e'
def dijkstra(g, s):
for v in g.vertices:
v.distance = float('inf')
s.distance = 0
toexplore = PriorityQueue([s], sortkey = lambda v: v.distance)
while not toexplore.is_empty():
v = toexplore.popmin()
# assert: v.distance is the true shortest distance from s to v
# assert: v is never put back into toexplore
for w,edgecost in v.neighbours:
dist_w = v.distance + edgecost
if dist_w < w.distance:
w.distance = dist_w
if w in toexplore:
toexplore.decreasekey(w)
else:
toexplore.push(w)
# Disjktra is guaranteed to work when all edge costs are >= 0.
g = DirectedGraph([('s','t',2), ('s','v',3), ('t','u',2), ('u','v',1), ('v','t',4)])
dijkstra(g, g.vertex['s'])
for v in g.vertices:
print(f"distance from s to {v} is {v.distance}")
distance from s to v is 3 distance from s to u is 4 distance from s to s is 0 distance from s to t is 2
# Let's see what it does on a graph with some edge costs < 0 ...
g = DirectedGraph([('s','t',2), ('s','v',3), ('t','u',3), ('u','v',1), ('v','t',-4)])
dijkstra(g, g.vertex['s'])
for v in g.vertices:
print(f"distance from s to {v} is {v.distance}")
distance from s to v is 3 distance from s to u is 2 distance from s to s is 0 distance from s to t is -1
def bf(g, s):
for v in g.vertices:
v.minweight = float('inf')
s.minweight = 0
for _ in range(len(g.vertices) - 1):
# relax all the edges
for (u,v,c) in g.edges:
v.minweight = min(u.minweight + c, v.minweight)
# assert v.minweight >= true minimum weight from s to v
for (u,v,c) in g.edges:
if u.minweight + c < v.minweight:
raise ValueError("Negative-weight cycle detected")
import math
g = DirectedGraph([('£','zl',-math.log(5.01)), ('£','$',-math.log(1.25)), ('zl','$',-math.log(0.27))])
bf(g, g.vertex['£'])
f"Best exchange rate £->$: £1 -> ${math.exp(- g.vertex['$'].minweight)}"
'Best exchange rate £->$: £1 -> $1.3527'
g= DirectedGraph([('a','b',1), ('b','c',2), ('c','d',3), ('d','b',-6)])
bf(g, g.vertex['a'])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-20-e1513e254bdf> in <module> 1 g= DirectedGraph([('a','b',1), ('b','c',2), ('c','d',3), ('d','b',-6)]) ----> 2 bf(g, g.vertex['a']) <ipython-input-18-116b97dbe4b4> in bf(g, s) 12 for (u,v,c) in g.edges: 13 if u.minweight + c < v.minweight: ---> 14 raise ValueError("Negative-weight cycle detected") ValueError: Negative-weight cycle detected
g = DirectedGraph([('a','b',1), ('a','c',4), ('b','c',2), ('c','d',3), ('d','b',-4)])
dst = g.vertex['d']
# Cache computed values of F(v,t) in value[(v,t)], and computed actions in action[(v,t)].
# The function dp(v,t) looks for value[(v,t)], and if it doesn't exist in the cache
# then it computes it using the dynamic programming recursion.
value = {}
action = {}
def dp(v, t):
if (v,t) in value:
return value[(v,t)]
if t == 0:
val,act = (0 if v==dst else float('inf'), None)
else:
choices = [(dp(v,t-1), None)] + [(weight+dp(w,t-1), w) for w,weight in v.neighbours]
val,act = min(choices, key = lambda x: x[0])
value[(v,t)], action[(v,t)] = val,act
return val
for t in [1,2]:
for v in g.vertices:
print(f'minweight({v} to {dst}) using paths with <= {t} edges: {dp(v,t)}')
print()
minweight(b to d) using paths with <= 1 edges: inf minweight(c to d) using paths with <= 1 edges: 3 minweight(d to d) using paths with <= 1 edges: 0 minweight(a to d) using paths with <= 1 edges: inf minweight(b to d) using paths with <= 2 edges: 5 minweight(c to d) using paths with <= 2 edges: 3 minweight(d to d) using paths with <= 2 edges: 0 minweight(a to d) using paths with <= 2 edges: 7
# Compute the minweight path and cache it
# (Theorem says: if there are no -ve weight cycles, a time horizon of V-1 is sufficient.)
src = g.vertex['a']
V = len(g.vertices)
dp(src, V-1)
# Then retrieve it
path = [src]
for t in range(V-1,0,-1):
v = path[-1]
w = action[(v,t)]
path.append(w)
f"Minweight path from {src} to {dst} is {'→'.join(str(v) for v in path)}, minweight is {value[(src,V-1)]}"
'Minweight path from a to d is a→b→c→d, minweight is 6'
import numpy as np
n = len(g.vertices)
W = np.full((n,n), np.inf)
np.fill_diagonal(W, 0)
vertex_ids = [v.id for v in g.vertices]
vi = {v_id:i for i,v_id in enumerate(sorted(vertex_ids))}
for u,v,c in g.edges:
W[vi[u.id]][vi[v.id]] = c
def matmul(A,B):
r,i = A.shape
j,c = B.shape
assert i==j, "number of A columns must equal number of B rows"
x = np.empty((r,c), dtype=A.dtype)
for i in range(r):
for j in range(c):
x[i,j] = np.min(A[i,:] + B[:,j])
return x
M = W
for _ in range(int(np.ceil(np.log2(n-1)))):
M = matmul(M,M)
f"Minweight path from {src} to {dst} has weight {M[vi[src.id],vi[dst.id]]}"
'Minweight path from a to d has weight 6.0'
# The original graph. Let the edge weights in the original graph be w(u->v).
g = DirectedGraph([('a','b',2), ('a','c',1), ('a','d',3), ('b','c',-2), ('c','e',-1), ('d','c',-2), ('d','e',4)])
# STEP 1. Build an augmented graph, with an extra node, and compute the d_v
s = '_s'
augmented = DirectedGraph([(v.id, w.id, c) for v,w,c in g.edges] + [(s, v.id, 0) for v in g.vertices])
bf(augmented, augmented.vertex[s]) # sets v.minweight for every vertex v
print(f"d_v[e] = {augmented.vertex['e'].minweight}")
d_v[e] = -3
# STEP 2. Define a helper graph, like the original
# but with edge weight u->v given by d_u + w(u->v) - d_v
# In the helper graph, all edges should have weight >= 0.
helper = DirectedGraph([(v.id, w.id, augmented.vertex[v.id].minweight + c - augmented.vertex[w.id].minweight)
for v,w,c in g.edges])
assert min(c for v,w,c in helper.edges) >= 0, "Helper graph should have all edge weights >= 0"
weight_prime = {(u.id,v.id):w for u,v,w in helper.edges}
print(f"weight'(d→e) = {weight_prime[('d','e')]}")
weight'(d→e) = 7
# STEP 3. Run Disjktra, starting from every vertex in the helper graph, to get a matrix of distances.
import numpy as np
n = len(g.vertices)
dist = np.full((n,n), np.inf)
vertex_ids = [v.id for v in g.vertices]
vi = {vid:i for i,vid in enumerate(sorted(vertex_ids))}
for vid in vertex_ids:
dijkstra(helper, helper.vertex[vid])
for wid in vertex_ids:
dist[vi[vid], vi[wid]] = helper.vertex[wid].distance
# STEP 4. Wrap up
d = np.zeros(n)
for vid in vertex_ids:
d[vi[vid]] = augmented.vertex[vid].minweight
dist - d[:,np.newaxis] + d[np.newaxis,:]
array([[ 0., 2., 0., 3., -1.], [inf, 0., -2., inf, -3.], [inf, inf, 0., inf, -1.], [inf, inf, -2., 0., -3.], [inf, inf, inf, inf, 0.]])
# Execute a unix command to download a file (if it's not already downloaded), and show download progress
import os.path
if os.path.exists('data/osm_cam.xml'):
print("file already downloaded")
else:
!wget "https://www.cl.cam.ac.uk/teaching/2021/Algorithms/data/osm_cam.xml" -P data
file already downloaded
import collections
import lxml.etree
import numpy as np
import pandas
# A namedtuple is a record, i.e. an immutable object with variables and no methods.
# It prints out nicely too.
# It's a convenient way to store pure data.
Node = collections.namedtuple('Node', ('id','lat','lng'))
Way = collections.namedtuple('Way', ('id','nodes','highway','is_oneway','is_area'))
def make_node(el):
return Node(id=el.get('id'), lat=el.get('lat'), lng=el.get('lon'))
def get_tag_value(el, k):
tag = el.find("tag[@k='{}']".format(k))
return tag.get('v') if tag is not None else None
def make_way(el):
return Way(id=el.get('id'),
nodes=[nd.get('ref') for nd in el.findall('nd')],
highway=get_tag_value(el, 'highway'),
is_oneway=get_tag_value(el, 'oneway'),
is_area=get_tag_value(el, 'area'))
nodes,ways = [],[]
# Iterate through every element of the xml tree, leaf nodes first and working up
for _,el in lxml.etree.iterparse('data/osm_cam.xml'):
if el.tag == 'node':
nodes.append(make_node(el))
elif el.tag == 'way':
ways.append(make_way(el))
elif el.tag in ['tag', 'nd']:
continue
# Save space by deleting the element-object after it's been processed.
# Make sure we don't delete tag and nd, since we want the parent <way> to see them!
el.clear()
coords = {n.id: (float(n.lng), float(n.lat)) for n in nodes}
edges = [(n1, n2, w) for w in ways for n1,n2 in zip(w.nodes[:-1],w.nodes[1:])]
osm = pandas.DataFrame({
'n0': [n1 for n1,n2,w in edges],
'n1': [n2 for n1,n2,w in edges],
'x0': [coords[n1][0] for n1,n2,w in edges],
'y0': [coords[n1][1] for n1,n2,w in edges],
'x1': [coords[n2][0] for n1,n2,w in edges],
'y1': [coords[n2][1] for n1,n2,w in edges],
'highway': [w.highway for n1,n2,w in edges],
'is_oneway': [w.is_oneway for n1,n2,w in edges],
'is_area': [w.is_area for n1,n2,w in edges]})
osmv = pandas.DataFrame(((n.id, float(n.lng), float(n.lat)) for n in nodes), columns=['id','lng','lat'])
Here are some of the attributes that may be present in each <way>. Don't make assumptions about the possible values... OSM data is crowd-sourced and not always consistent.
ways[10]
Way(id='3217814', nodes=['14913683', '1483462366', '25285249', '3709007160', '25285243', '25285244', '3709007159', '25285245', '15695195', '6572094', '3698818604', '15695194', '1480279461', '575024329', '6572093'], highway='residential', is_oneway=None, is_area=None)
print('highways:', {w.highway for w in ways})
print('is_oneway:', {w.is_oneway for w in ways})
print('is_area:', {w.is_area for w in ways})
highways: {'footway', 'trunk_link', 'trunk', None, 'cycleway', 'primary_link', 'steps', 'primary', 'living_street', 'motorway_link', 'tertiary', 'crossing', 'unclassified', 'track', 'pedestrian', 'path', 'road', 'motorway', 'construction', 'bus_guideway', 'residential', 'secondary', 'service', 'bridleway'} is_oneway: {'-1', 'yes', 'no', None} is_area: {'yes', 'no', None}
# Selecting edges and points in a circle around Mill Lane lecture rooms.
# To account for the scale of latitude and longitude, there needs to be a
# correction factor of cos(latitude) applied to north-south distances.
cy,cx = (52.201605, 0.117172)
c = np.cos(cy/360*2*np.pi)
osm['d'] = np.sqrt(np.maximum( ((osm['x0']-cx)*c)**2 + (osm['y0']-cy)**2,
((osm['x1']-cx)*c)**2 + (osm['y1']-cy)**2)) /360*40.075e6
osmv['d'] = np.sqrt( ((osmv['lng']-cx)*c)**2 + (osmv['lat']-cy)**2) /360*40.075e6
osm['meters'] = np.sqrt(((osm['x1']-osm['x0'])*c)**2 + (osm['y1']-osm['y0'])**2) /360*40.075e6
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
r = 860
segments = osm.loc[osm['d']<r][['x0','y0','x1','y1']].values.reshape((-1,2,2))
points = osmv.loc[osmv['d']<r][['lng','lat']].values
fig,ax = plt.subplots(figsize=(12,12))
lc = LineCollection(segments)
lc.set_linewidth(0.1)
lc.set_color('0.3')
ax.add_collection(lc)
ax.scatter(points[:,0], points[:,1], s=.1, color='0.4')
rd = r / 40.075e6 * 360
ax.set_xlim((cx-rd/c,cx+rd/c))
ax.set_ylim((cy-rd,cy+rd))
ax.set_aspect(1/c, 'box')
plt.show()