PyTorch introduction

Question

EXERCISE 1. Fit this model using scipy.optimize.fmin. You will need to implement a function that computes the log likelihood, call it logPr(y, x,a,b,c,σ). To keep your code tidy, start by defining a helper function that computes the predicted value,

def μ(x, a,b,c): return a + b*x + c*x**2

Plot the fitted μ function, and add the ribbon using fill_between.

Answer 1

def μ(x, a,b,c):
    return a + b*x + c*x**2

def logPr(y, x,a,b,c,σ):
    return np.sum(scipy.stats.norm.logpdf(y, loc=μ(x,a,b,c), scale=np.sqrt(σ**2)))

ahat,bhat,chat,σhat = scipy.optimize.fmin(lambda θ: -logPr(xkcd.y, xkcd.x, *θ), [1,1,0,1])
# Expected answer for log likelihood: -61.575367

fig,ax = plt.subplots(figsize=(4,3))
ax.scatter(xkcd.x, xkcd.y)
xnew = np.linspace(0,10,100)
ynew = μ(xnew, ahat, bhat, chat)
ax.plot(xnew, ynew, color='black')
ax.fill_between(xnew, ynew-2*σhat, ynew+2*σhat, color='steelblue', alpha=.6)
plt.show()

Answer 2

import torch
import torch.nn as nn

class LogPr(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = nn.Parameter(torch.tensor(1.0))
        self.b = nn.Parameter(torch.tensor(1.0))
        self.c = nn.Parameter(torch.tensor(0.0))
        self.σ = nn.Parameter(torch.tensor(1.0))
    def μ(self, x):
        return self.a + self.b * x + self.c * x**2
    def forward(self, y, x):
        σ2 = self.σ ** 2
        return torch.sum(- 0.5*torch.log(2*np.pi*σ2) - torch.pow(y - self.μ(x), 2) / (2*σ2))
    
x = torch.tensor(xkcd.x, dtype=torch.float)
y = torch.tensor(xkcd.y, dtype=torch.float)
logPr = LogPr()
logPr(y, x)
# expected output: tensor(-185.3153, grad_fn=<SumBackward0>)

Answer 3

import torch
import torch.nn as nn

class LogPr(nn.Module):
   # as for Exercise 2
    
x = torch.tensor(xkcd.x, dtype=torch.float)
y = torch.tensor(xkcd.y, dtype=torch.float)
logPr = LogPr()

optimizer = torch.optim.Adam(logPr.parameters())

for epoch in range(10000):
    optimizer.zero_grad()
    loglik = logPr(y, x)
    (-loglik).backward() # we want to maximize logPr, i.e. minimize -loglik
    optimizer.step()

print(logPr(y,x))
# expected output: tensor(-61.5762, grad_fn=<SumBackward0>)

Answer 4

import torch
import torch.nn as nn

class QuadraticCurve(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = nn.Parameter(torch.tensor(1.0))
        self.b = nn.Parameter(torch.tensor(1.0))
        self.c = nn.Parameter(torch.tensor(0.0))
    def forward(self, x):
        return self.a + self.b * x + self.c * x**2
    
class RQuadratic(nn.Module):
    def __init__(self):
        super().__init__()
        self.μ = QuadraticCurve()
        self.σ = nn.Parameter(torch.tensor(1.0))
    def logPr(self, y, x):
        σ2 = self.σ ** 2
        return torch.sum(- 0.5*torch.log(2*np.pi*σ2) - torch.pow(y - self.μ(x), 2) / (2*σ2))

x,y = torch.tensor(xkcd.x, dtype=torch.float), torch.tensor(xkcd.y, dtype=torch.float)
mymodel = RQuadratic()
optimizer = torch.optim.Adam(mymodel.parameters())

for epoch in range(10000):
    optimizer.zero_grad()
    loglik = mymodel.logPr(y, x)
    (-loglik).backward()
    optimizer.step()

print(loglik)
# Should give same answer as before, tensor(-61.5762, grad_fn=<SumBackward0>)

Answer 5

xnew = torch.linspace(0,10,100)
with torch.no_grad():
    ynew = logPr.μ(xnew)
xnew = xnew.detach().numpy()
ynew = ynew.detach().numpy()
σhat = logPr.σ.item()

fig,ax = plt.subplots(figsize=(4,3))
ax.scatter(xkcd.x, xkcd.y)
ax.plot(xnew, ynew, color='black')
ax.fill_between(xnew, ynew-2*σhat, ynew+2*σhat, color='steelblue', alpha=.6)
plt.show()

Answer 6

x = torch.tensor(xkcd.x, dtype=torch.float)
y = torch.tensor(xkcd.y, dtype=torch.float)
mymodel = RQuadratic()  # defined in Exercise 5
epoch = 0

optimizer = torch.optim.Adam(mymodel.parameters())

def plot_quadratic(mymodel):
    with torch.no_grad():
        xnew = torch.linspace(0,10,100)
        ynew = mymodel.μ(xnew)
        xnew = xnew.detach().numpy()
        ynew = ynew.detach().numpy()
        σ = mymodel.σ.item()
    fig,ax = plt.subplots()
    ax.fill_between(xnew, ynew-2*σ, ynew+2*σ, color='steelblue', alpha=.6)
    ax.plot(xnew, ynew, color='steelblue')
    ax.scatter(x, y, color='black', marker='+', alpha=.8)
    plt.show()

with Interruptable() as check_interrupted:
    while True:
        check_interrupted()
        optimizer.zero_grad()
        loglik = torch.sum(mymodel.logPr(y, x))
        (-loglik).backward()
        optimizer.step()
        epoch += 1

        if epoch % 200 == 0:
            IPython.display.clear_output(wait=True)
            print(f'epoch={epoch} loglik={loglik.item():.4} σ={mymodel.σ:.4}')
            plot_quadratic(mymodel)

Answer 7

class RWiggle(nn.Module):
    def __init__(self):
        super().__init__()
        # self.μ maps R^(n×1) to R^(n×1)
        self.μ = nn.Sequential(
            nn.Linear(1,4),
            nn.LeakyReLU(),
            nn.Linear(4,20),
            nn.LeakyReLU(),
            nn.Linear(20,20),
            nn.LeakyReLU(),
            nn.Linear(20,1)
        )
        self.σ = nn.Parameter(torch.tensor(1.0))
    def logPr(self, y, x):
        # x and y are tensors of shape (n,)
        # Reshape x to be (n,1), apply μ, then drop the last dimension
        m = self.μ(x[:,None])[:,0]  
        σ2 = self.σ ** 2
        return torch.sum(- 0.5*torch.log(2*np.pi*σ2) - torch.pow(y - m, 2) / (2*σ2))

def plot_wiggle(mymodel):
    with torch.no_grad():
        xnew = torch.linspace(0,10,100)[:,None] # array dim 100×1
        ynew = mymodel.μ(xnew)                  # array dim 100×1
        xnew = xnew.detach().numpy()[:,0]       # vector length 100
        ynew = ynew.detach().numpy()[:,0]       # vector length 100
        σ = mymodel.σ.item()
    fig,ax = plt.subplots()
    ax.fill_between(xnew, ynew-2*σ, ynew+2*σ, color='steelblue', alpha=.6)
    ax.plot(xnew, ynew, color='steelblue')
    ax.scatter(x, y, color='black', marker='+', alpha=.8)
    plt.show()

x = torch.tensor(xkcd.x, dtype=torch.float)
y = torch.tensor(xkcd.y, dtype=torch.float)
mymodel = RWiggle()
epoch = 0
optimizer = torch.optim.Adam(mymodel.parameters())

with Interruptable() as check_interrupted:
    while True:
        check_interrupted()
        optimizer.zero_grad()
        loglik = mymodel.logPr(y, x)
        (-loglik).backward()
        optimizer.step()
        epoch += 1    
        if epoch % 200 == 0:
            IPython.display.clear_output(wait=True)
            print(f'epoch={epoch} loglik={loglik.item():.4} σ={mymodel.σ:.4}')
            plot_wiggle(mymodel)

Answer 8

data = torch.tensor(np.column_stack([xkcd.x, xkcd.y]), dtype=torch.float)
data_batched = torch.utils.data.DataLoader(data, batch_size=5, shuffle=True)

mymodel = RWiggle() # from exercise 7
optimizer = torch.optim.Adam(mymodel.parameters())
epoch = 0

with Interruptable() as check_interrupted:
    while True:
        check_interrupted()
        for b in data_batched:
            optimizer.zero_grad()
            loglik = torch.sum(mymodel.logPr(b[:,1,None], b[:,0,None]))
            (-loglik).backward()
            optimizer.step()
        epoch = epoch + 1

        if epoch % 200 == 0:
            IPython.display.clear_output(wait=True)
            print(f'epoch={epoch} loglik={loglik.item():.4} σ={σ:.4}')
            plot_wiggle(mymodel) # from exercise 6

PyTorch introduction

Getting started with PyTorch

Step 1. Fitting with scipy

Step 2. Defining functions in PyTorch

Step 3. Optimization in PyTorch

Step 4. Nested modules

Step 5. Getting answers out of PyTorch

Step 6. Making it interactive

STEP 7. Using a neural network

STEP 8. Batched gradient descent

STEP 9. Challenge