In [1]:
import numpy as np
x = np.linspace(-np.pi, np.pi, 200).reshape(-1, 1)
y = np.sin(x)
In [2]:
np.random.seed(0)
In [3]:
input_size = 1
hidden_size = 30
output_size = 1
In [4]:
w1 = np.random.randn(input_size, hidden_size) * 0.1
b1 = np.zeros((1, hidden_size))
w2 = np.random.randn(hidden_size, output_size) * 0.1
b2 = np.zeros((1, output_size))
In [5]:
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1 - np.tanh(x) ** 2
In [6]:
learning_rate = 0.01
epochs = 100000
In [7]:
for epoch in range(epochs):
z1 = np.dot(x, w1) + b1
a1 = tanh(z1)
z2 = np.dot(a1, w2) + b2
a2 = tanh(z2)
dl_wrt_a2 = 2/x.shape[0] * (a2 - y)
dl_wrt_z2 = dl_wrt_a2 * tanh_derivative(z2)
dl_wrt_w2 = np.dot(a1.T, dl_wrt_z2)
dl_wrt_b2 = np.sum(dl_wrt_z2, axis=0, keepdims=True)
dl_wrt_a1 = np.dot(dl_wrt_z2, w2.T)
dl_wrt_z1 = dl_wrt_a1 * tanh_derivative(z1)
dl_wrt_w1 = np.dot(x.T, dl_wrt_z1)
dl_wrt_b1 = np.sum(dl_wrt_z1, axis=0, keepdims=True)
w1 -= learning_rate * dl_wrt_w1
b1 -= learning_rate * dl_wrt_b1
w2 -= learning_rate * dl_wrt_w2
b2 -= learning_rate * dl_wrt_b2
if epoch%10000 == 0:
print(f"epoch: {epoch} dl_wrt_a2: {dl_wrt_a2[0]}")
epoch: 0 dl_wrt_a2: [0.00286213] epoch: 10000 dl_wrt_a2: [-0.00192854] epoch: 20000 dl_wrt_a2: [-0.00100441] epoch: 30000 dl_wrt_a2: [-0.00061229] epoch: 40000 dl_wrt_a2: [-0.00039218] epoch: 50000 dl_wrt_a2: [-0.00024361] epoch: 60000 dl_wrt_a2: [-0.00013242] epoch: 70000 dl_wrt_a2: [-4.42860678e-05] epoch: 80000 dl_wrt_a2: [2.81320997e-05] epoch: 90000 dl_wrt_a2: [8.91257671e-05]
In [8]:
import matplotlib.pyplot as plt
x_test = np.linspace(-np.pi, np.pi, 100).reshape(-1, 1)
y_test = np.sin(x_test)
z1_test = np.dot(x_test, w1) + b1
a1_test = tanh(z1_test)
z2_test = np.dot(a1_test, w2) + b2
a2_test = tanh(z2_test)
plt.plot(x_test, y_test, label="True sin(x)", color="blue")
plt.plot(x_test, a2_test, label="MLP Prediction", color="red", linestyle="dashed")
plt.legend()
plt.show()
In [9]:
import math
MLPsine90 = tanh(np.dot(tanh(np.dot(math.pi, w1)), w2))
out = float(MLPsine90[0][0])
expt = math.sin(math.pi)
print(f"expected {expt} got {out}")
expected 1.2246467991473532e-16 got -0.014145429544487284
In [10]:
diff = out-expt/out * 100
print(f"off by {math.fabs(diff)}%")
off by 0.01414542954362153%
In [11]:
def mlpsin(x):
return tanh(np.dot(tanh(np.dot(x, w1)), w2))
In [12]:
mlpsin(0)
Out[12]:
array([[0.]])
In [13]:
def mlpcos(x, epsilon=1e-5):
return (mlpsin(x + epsilon) - mlpsin(x)) / epsilon
In [14]:
x_test = np.linspace(-np.pi, np.pi, 100).reshape(-1, 1)
y_pred_sin = np.array([mlpsin(x) for x in x_test])
y_pred_cos = np.array([mlpcos(x) for x in x_test])
In [15]:
plt.plot(x_test, y_pred_sin, label="mlpsin(x)", color="blue", linestyle="dashed")
plt.plot(x_test, y_pred_cos, label="mlpcos(x)", color="red", linestyle="dashed")
plt.legend()
plt.show()
In [16]:
def np_cos_fake(x, epsilon=1e-5):
return (np.sin(x + epsilon) - np.sin(x)) / epsilon
x_test = np.linspace(-np.pi, np.pi, 100).reshape(-1, 1)
y_pred_sin = np.array([np.sin(x) for x in x_test])
y_pred_cos = np.array([np_cos_fake(x) for x in x_test])
In [17]:
plt.plot(x_test, y_pred_sin, label="true sin(x)", color="blue", linestyle="dashed")
plt.plot(x_test, y_pred_cos, label="true cos(x)", color="red", linestyle="dashed")
plt.legend()
plt.show()
In [18]:
def mlpsin2dif(x, epsilon=1e-5):
return (mlpcos(x + epsilon) - mlpcos(x)) / epsilon
In [19]:
y_pred_sin = np.array([mlpsin2dif(x) for x in x_test])
x_test = np.linspace(-np.pi, np.pi, 100).reshape(-1, 1)
plt.plot(x_test, y_pred_sin, label="mlpsin2dif(x)", color="blue", linestyle="dashed")
plt.legend()
plt.show()