# **Deep Learning Lab: Tensorflow and Pytorch**

__author__:Sehyun Park  
__email__:ps_hyen@snu.ac.kr

In this notebook, we will construct a simple neural network with Tensorflow and Pytorch

In [1]:
import numpy as np
import tensorflow as tf
import torch
device = torch.device('cuda:0') if torch.cuda.is_available() is True else 'cpu'

# **With Tensorflow**

##Efficient code for parameter update by using 'assign'

In [2]:
# Gradient descent with learning rates 1e-3.
N, D_x, D_h1, lr = 10000, 10, 32, 1e-3

# Data generation
tf.random.set_seed(1004)
x = tf.random.normal((N, D_x))
y = tf.random.normal((N, 1))
w1 = tf.Variable(tf.random.normal((D_x,D_h1)), name="w1")
w2 = tf.Variable(tf.random.normal((D_h1, 1)), name="w2")

# define model framework
def forward(x):
  return tf.matmul(tf.maximum(tf.matmul(x, w1), 0), w2)

for step in range(5):
  with tf.GradientTape() as tape:
    y_pred = forward(x)
    loss = tf.reduce_mean(tf.reduce_sum((y-y_pred) **2, axis=1))

  grad_w1, grad_w2 = tape.gradient(loss, [w1, w2])

  w1.assign(w1 - lr*grad_w1)
  w2.assign(w2 - lr*grad_w2)
  print(loss)

tf.Tensor(79.70336, shape=(), dtype=float32)
tf.Tensor(72.691284, shape=(), dtype=float32)
tf.Tensor(66.84087, shape=(), dtype=float32)
tf.Tensor(61.905945, shape=(), dtype=float32)
tf.Tensor(57.69779, shape=(), dtype=float32)


##More efficient code for parameter update

---



In [3]:
# Gradient descent with learning rates 1e-3.
N, D_x, D_h1, lr = 10000, 10, 32, 1e-3

# Data generation
tf.random.set_seed(1004)
x = tf.random.normal((N, D_x))
y = tf.random.normal((N, 1))
w1 = tf.Variable(tf.random.normal((D_x,D_h1)), name="w1")
w2 = tf.Variable(tf.random.normal((D_h1, 1)), name="w2")

# define model framework
def forward(x):
  return tf.matmul(tf.maximum(tf.matmul(x, w1), 0), w2)

# set optimizeor
# optimizer = tf.keras.optimizers.experimental.SGD(1e-3)
optimizer = tf.keras.optimizers.SGD(1e-3)

for step in range(5):
  with tf.GradientTape() as tape:
    y_pred = forward(x)
    loss = tf.reduce_mean(tf.reduce_sum((y-y_pred) **2, axis=1))
#    optimizer.minimize(loss, [w1, w2], tape)
    gradients = tape.gradient(loss, [w1, w2])
    optimizer.apply_gradients(zip(gradients, [w1, w2]))
  print(loss)

tf.Tensor(79.70336, shape=(), dtype=float32)
tf.Tensor(72.691284, shape=(), dtype=float32)
tf.Tensor(66.84087, shape=(), dtype=float32)
tf.Tensor(61.905945, shape=(), dtype=float32)
tf.Tensor(57.69779, shape=(), dtype=float32)


#**With Pytorch**

In [4]:
import torch
device = torch.device('cuda:0') if torch.cuda.is_available() is True else 'cpu'
torch.manual_seed(1004)

<torch._C.Generator at 0x7b950a11fcd0>

In [5]:
# Data generation
N, D_x, D_h1 = 10000, 10, 32
x = torch.randn(N, D_x, requires_grad=False)
y = torch.randn(N, 1, requires_grad=False)

# Construct model by Squential
model = torch.nn.Sequential(
        torch.nn.Linear(D_x, D_h1),
        torch.nn.ReLU(),
        torch.nn.Linear(D_h1, 1))

criterion = torch.nn.MSELoss()
learning_rates = 1e-3
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rates)

for step in range(5):
  y_pred = model(x)
  loss = criterion(y_pred, y)
  print(loss)

  optimizer.zero_grad()
  loss.backward() # back-prop
  optimizer.step() # update

tensor(1.0414, grad_fn=<MseLossBackward0>)
tensor(1.0414, grad_fn=<MseLossBackward0>)
tensor(1.0413, grad_fn=<MseLossBackward0>)
tensor(1.0413, grad_fn=<MseLossBackward0>)
tensor(1.0412, grad_fn=<MseLossBackward0>)


In [6]:
# Construct model by class!

"""
model = torch.nn.Sequential(
        torch.nn.Linear(D_x, D_h1),
        torch.nn.ReLU(),
        torch.nn.Linear(D_h1, 1))
"""

# 'nn.Module' makes your life happier.
class SimpleNet(torch.nn.Module):
  def __init__(self, D_x=D_x, D_h1=D_h1):
    super(SimpleNet, self).__init__()
    self.linear1 = torch.nn.Linear(D_x, D_h1) # fully connected layer
    self.linear2 = torch.nn.Linear(D_h1, 1) # fully connected layer

  def forward(self, x):
    h_relu = self.linear1(x).clamp(min=0)
    y_pred = self.linear2(h_relu)
    return y_pred


model = SimpleNet()
criterion = torch.nn.MSELoss()
learning_rates = 1e-3
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rates)


# Gradient descent
for _ in range(5):
  y_pred = model(x)
  loss = criterion(y_pred, y)
  print(loss)

  optimizer.zero_grad()
  loss.backward() # back-prop
  optimizer.step() # update


tensor(1.0400, grad_fn=<MseLossBackward0>)
tensor(1.0399, grad_fn=<MseLossBackward0>)
tensor(1.0399, grad_fn=<MseLossBackward0>)
tensor(1.0399, grad_fn=<MseLossBackward0>)
tensor(1.0398, grad_fn=<MseLossBackward0>)


In [7]:
from torch.utils.data import TensorDataset, DataLoader

# Data generation
N, D_x, D_h1 = 10000, 10, 32

loader = DataLoader(TensorDataset(x,y), batch_size = 100) # Loader style

# Stochastic gradient descent
for epoch in range(5):
  for x_batch, y_batch in loader:
    x, y = x_batch, y_batch
    y_pred = model(x)
    loss = criterion(y_pred, y)

    optimizer.zero_grad()
    loss.backward() # back-prop
    optimizer.step() # update
  print(loss)


tensor(0.9929, grad_fn=<MseLossBackward0>)
tensor(0.9929, grad_fn=<MseLossBackward0>)
tensor(0.9925, grad_fn=<MseLossBackward0>)
tensor(0.9921, grad_fn=<MseLossBackward0>)
tensor(0.9916, grad_fn=<MseLossBackward0>)
