Today I will share the deep neural network using numpy, which are commonly asked in machine learning interviews:
- Initialization
- Activation Function
- Single Layer Forward and Full Layer Forward
- Cost Function and Metric Evaluation
- Single Layer Backward and Full Layer Backward
- Training and Test
Code Implementation
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
# Initialization
def init_layers(nn_structure, seed=42):
np.random.seed(seed)
param_values = {}
for i, layer in enumerate(nn_structure):
layer_idx = i+1
layer_input_size = layer["input_dim"]
layer_output_size = layer["output_dim"]
param_values["W_" + str(layer_idx)] = np.random.randn(layer_output_size, layer_input_size)*0.1
param_values["b_" + str(layer_idx)] = np.random.randn(layer_output_size, 1)*0.1
return param_values
# Activaition Function
def sigmoid(Z):
return 1/(1+np.exp(-Z))
def relu(Z):
return np.maximum(0,Z)
def sigmoid_backward(dA, Z):
sig = sigmoid(Z)
return dA * sig * (1 - sig)
def relu_backward(dA, Z):
dZ = np.array(dA, copy = True)
dZ[Z <= 0] = 0
return dZ
# Single Layer Forward and Full Layer Forward
def single_layer_forward(A_prev, W, b, activation):
Z = np.dot(W, A_prev) + b
if activation == "sigmoid":
A = sigmoid(Z)
elif activation == "relu":
A = relu(Z)
else:
raise Exception("Non-supported activation functions")
return A, Z
def full_layer_forward(X, param_values, nn_structure):
cache = {}
A = X
for i, layer in enumerate(nn_structure):
layer_idx = i+1
A_prev = A
activation = layer["activation"]
W = param_values["W_" + str(layer_idx)]
b = param_values["b_" + str(layer_idx)]
A, Z = single_layer_forward(A_prev, W, b, activation)
cache["A_"+ str(i)] = A_prev
cache["Z_" + str(layer_idx)] = Z
return A, cache
# Cost Function and Metric Evaluation
def cost_func(Y_hat, Y):
m = Y_hat.shape[1]
cost = -(np.dot(Y, np.log(Y_hat).T) + np.dot(1-Y, np.log(1-Y_hat).T))/m
return np.squeeze(cost)
def acc_func(Y_hat, Y, threshold=0.5):
probs = np.copy(Y_hat)
probs[probs > threshold] = 1
probs[probs <= threshold] = 0
return (probs == Y).all(axis=0).mean()
# Single Layer Backward and Full Layer Backward
def single_layer_backward(dA, W, b, Z, A_prev, activation):
m = A_prev.shape[1]
if activation == "sigmoid":
dZ = sigmoid_backward(dA, Z)
elif activation == "relu":
dZ = relu_backward(dA, Z)
else:
raise Exception("Non-supported activation functions")
dW = np.dot(dZ, A_prev.T)/m
db = np.sum(dZ, axis=1, keepdims=True)/m
dA_prev = np.dot(W.T, dZ)
return dA_prev, dW, db
def full_layer_backward(Y_hat, Y, cache, param_values, nn_structure):
grads_values = {}
Y = Y.reshape(Y_hat.shape)
dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))
for layer_idx_prev in range(len(nn_structure)-1, -1, -1):
layer_idx = layer_idx_prev+1
layer = nn_structure[layer_idx_prev]
activation = layer["activation"]
dA = dA_prev
A_prev = cache["A_" + str(layer_idx_prev)]
Z = cache["Z_" + str(layer_idx)]
W = param_values["W_" + str(layer_idx)]
b = param_values["b_" + str(layer_idx)]
dA_prev, dW, db = single_layer_backward(dA, W, b, Z, A_prev, activation)
grads_values["dW_" + str(layer_idx)] = dW
grads_values["db_" + str(layer_idx)] = db
return grads_values
# Training and Test
def update(param_values, grads_values, nn_structure, learning_rate):
for i in range(1, len(nn_structure)):
param_values["W_" + str(i)] -= learning_rate*grads_values["dW_" + str(i)]
param_values["b_" + str(i)] -= learning_rate*grads_values["db_" + str(i)]
return param_values
def train(X, Y, nn_structure, epochs, learning_rate):
param_values = init_layers(nn_structure)
cost_history = []
acc_history = []
for i in range(epochs):
Y_hat, cache = full_layer_forward(X, param_values, nn_structure)
cost = cost_func(Y_hat, Y)
acc = acc_func(Y_hat, Y)
if i%1000 == 0:
print("cost value: %f"%cost)
print("acc value: %f"%acc)
cost_history.append(cost)
acc_history.append(acc)
grads_values = full_layer_backward(Y_hat, Y, cache, param_values, nn_structure)
param_values = update(param_values, grads_values, nn_structure, learning_rate)
return param_values
# number of samples in the data set
N_SAMPLES = 1000
# ratio between training and test sets
TEST_SIZE = 0.2
# NN structure
NN_STRUCTURE = [
{"input_dim": 2, "output_dim": 25, "activation": "relu"},
{"input_dim": 25, "output_dim": 50, "activation": "relu"},
{"input_dim": 50, "output_dim": 50, "activation": "relu"},
{"input_dim": 50, "output_dim": 25, "activation": "relu"},
{"input_dim": 25, "output_dim": 1, "activation": "sigmoid"},
]
X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
# Training
params_values = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), NN_STRUCTURE, 10000, 0.01)
# Prediction
Y_test_hat, _ = full_layer_forward(np.transpose(X_test), params_values, NN_STRUCTURE)
# Accuracy achieved on the test set
acc_test = acc_func(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))
print("Test set accuracy: {:.2f}".format(acc_test))