# **Deep Learning Lab: Computational graph**

__author__:Sehyun Park  
__email__:ps_hyen@snu.ac.kr

In this notebook, we will construct simple computational graphs with numpy, tensorflow, and pytorch.

#**With numpy**

In [1]:
import numpy as np
np.random.seed(0)

In [2]:
N, D = 3, 4
# Define variables
x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D)

In [3]:
print('x:\n',x)
print('shape of x:\n',x.shape)

x:
 [[ 1.76405235  0.40015721  0.97873798  2.2408932 ]
 [ 1.86755799 -0.97727788  0.95008842 -0.15135721]
 [-0.10321885  0.4105985   0.14404357  1.45427351]]
shape of x:
 (3, 4)


In [4]:
# Define operations
a = x * y
b = a + z
c = np.sum(b)

In [5]:
grad_c = 1.0
grad_b = grad_c * np.ones((N,D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x

In [6]:
print(grad_x)

[[ 0.76103773  0.12167502  0.44386323  0.33367433]
 [ 1.49407907 -0.20515826  0.3130677  -0.85409574]
 [-2.55298982  0.6536186   0.8644362  -0.74216502]]


# **With Tensorflow**

In [7]:
import tensorflow as tf

In [8]:
print('GPUs are available: ', tf.test.is_gpu_available())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


GPUs are available:  True


In [9]:
x_tf = tf.Variable(x, name = "x")
y_tf = tf.Variable(y, name = "y")
z_tf = tf.Variable(z, name = "z")

with tf.GradientTape(persistent = True) as tape:
  a = x_tf * y_tf
  b = a + z_tf
  c = tf.reduce_sum(b)

grad_x, grad_y, grad_z = tape.gradient(c, [x_tf, y_tf, z_tf])

In [10]:
grad_x

<tf.Tensor: shape=(3, 4), dtype=float64, numpy=
array([[ 0.76103773,  0.12167502,  0.44386323,  0.33367433],
       [ 1.49407907, -0.20515826,  0.3130677 , -0.85409574],
       [-2.55298982,  0.6536186 ,  0.8644362 , -0.74216502]])>

#**With Pytorch**

In [11]:
import torch

In [12]:
print('GPUs are available: ', torch.cuda.is_available())

GPUs are available:  True


In [13]:
import torch

device = torch.device('cuda:0') if torch.cuda.is_available() is True else None

N, D = 3, 4

# Define variables
x_to = torch.tensor(x, requires_grad=True, device=device)
y_to= torch.tensor(y, requires_grad=True, device=device)
z_to = torch.tensor(z, requires_grad=True, device=device)

# Define operations
a = x_to * y_to
b = a + z_to
c = torch.sum(b)

c.backward()

In [14]:
print('x.grad: ',x_to.grad)

x.grad:  tensor([[ 0.7610,  0.1217,  0.4439,  0.3337],
        [ 1.4941, -0.2052,  0.3131, -0.8541],
        [-2.5530,  0.6536,  0.8644, -0.7422]], device='cuda:0',
       dtype=torch.float64)


\

# **Simple perceptron**

## **With Tensorflow**

In [15]:
N, D_x, D_h1 = 10000, 10, 32

x = tf.Variable(tf.random.normal((N, D_x)), name='x')
y = tf.Variable(tf.random.normal((N, 1)), name='y')
w1 = tf.Variable(tf.random.normal((D_x, D_h1)), name='w1')
w2 = tf.Variable(tf.random.normal((D_h1, 1)), name='w2')

with tf.GradientTape() as tape:
  h1 = tf.maximum(tf.matmul(x,w1), 0)
  y_pred = tf.matmul(h1,w2)
  loss = tf.reduce_mean(tf.reduce_sum((y-y_pred) **2, axis=1))

grad_w1, grad_w2 = tape.gradient(loss, [w1, w2])

In [16]:
grad_w1, grad_w2

(<tf.Tensor: shape=(10, 32), dtype=float32, numpy=
 array([[-2.4804134 ,  0.7629787 , -3.0890114 ,  1.6935879 ,  0.49326903,
          4.141502  , -1.0826458 ,  0.47780025,  0.15680213,  0.18337539,
         -0.88825274, -0.2593753 ,  2.1260767 ,  0.5827554 , -0.98992574,
          0.20804149,  0.93529135,  0.6160516 , -0.5802758 ,  0.15672639,
          1.3419573 , -1.2930797 , -0.15163049,  1.5235788 ,  0.53456587,
          3.0382314 ,  0.05519387, -1.4916612 ,  0.3462555 , -0.429146  ,
          1.2346029 ,  0.73626333],
        [-1.8874203 , -0.2230596 , -2.8029366 , -0.47598794, -0.14632006,
          1.3070706 , -0.28864563,  0.5341218 ,  0.5892554 ,  0.27382424,
          0.45685416,  0.2707192 ,  0.87038684,  0.39864078, -0.52221483,
          0.30646166, -0.08501669,  0.07729276, -1.0291512 ,  0.3993472 ,
          3.2461135 , -1.0670245 ,  0.67403704,  1.0686061 ,  2.3333626 ,
          0.45037714,  0.04201885, -0.6802182 , -0.80219775, -0.7416532 ,
          0.07225764, -0.

##**With Pytorch**

In [17]:
device = torch.device('cuda:0') if torch.cuda.is_available() is True else None

N, D_x, D_h1 = 10000, 10, 32

x = torch.randn((N, D_x), requires_grad=False, device = device)
y = torch.randn((N, 1), requires_grad=False, device = device)
w1 = torch.randn((D_x, D_h1), requires_grad=True, device = device)
w2 = torch.randn((D_h1, 1), requires_grad=True, device = device)

h1 = torch.max(torch.matmul(x, w1),0)
y_pred = torch.matmul(h1.values, w2)
loss = torch.mean(torch.sum((y - y_pred)**2, axis = 1))

loss.backward()

In [18]:
w1.grad, w2.grad

(tensor([[ 2.6533e+01,  2.9154e+01,  2.3209e+02, -1.8531e+02,  7.6745e+01,
          -3.9262e+02, -3.0547e+01,  2.1023e+01,  1.3174e+02, -1.8753e+02,
           2.7668e+01, -4.6347e+01, -5.5038e+01,  1.0353e+02,  1.0028e+02,
          -1.6998e+02,  2.2380e+02, -1.3566e+02, -7.9261e+01, -2.5503e+02,
           3.3681e+00,  9.6904e+01,  2.8491e+02,  4.4402e+01,  2.2089e+02,
           1.4606e+02,  2.1138e+02,  2.7057e+01,  8.6026e+01,  8.3876e+01,
           5.9292e+02, -7.8925e-01],
         [-3.0205e+02,  1.0732e+01, -8.4715e+02,  1.7116e+02, -9.2541e+01,
          -8.8902e+02, -1.5738e+01,  1.2161e+02,  2.4310e+01,  2.9219e+01,
           5.5164e+02,  1.5435e+02,  6.6366e+01, -7.1103e+01, -1.3163e+01,
           3.8246e+01, -2.5223e+02,  9.3170e+01,  7.6384e+00, -4.6667e+01,
           1.9044e+00,  4.6951e+01,  9.6103e+02, -3.4285e+01,  1.8268e+02,
           1.0864e+02,  3.2070e+02, -9.3990e+01,  4.4304e+01,  6.2995e+01,
          -5.7139e+01,  2.2173e+01],
         [ 7.2391e+01,  3.