A1 ) Add mini-batch stochastic gradient decent for both networks, add a function to calculate prediction accuracy for both networks.¶A2) Plot the histogram of activation functions for each layer in network 1 after the training is done.A3) Add two more hidden layers to the first network and define backpropegation accordinglyA4) add 1-2-3 more hidden layers to the network 2 and plot cost for each epoch as a line in the line plot. The color for each line should be unique based on the number of hidden layers.Your line plot should have 4 lines, Original (2 layers), 3 layers, 4 layers and 5 layers networks. Please see the code in the attached document and modify it according to the above mentioned questions. Please do in a ipynb file (jupyter) or google colab

[1] Do it in ipynb file
[2] from sklearn import datasets
from sklearn import preprocessing
import torch
import numpy as np
[3] iris = datasets.load_iris()
samples = preprocessing.normalize([:,:4]
labels =, 1)
[4] print(samples.shape,labels.shape)
[5] ### to have a binary classification get the only class 1 and 2
i, j = np.where(labels == 2)
samples = np.delete(samples, i, axis =0)
labels = np.delete(labels, i,axis =0)
[6] print(samples.shape,labels.shape)
[7] #### Loading the data into torch tensor
samples = torch.tensor(samples, dtype=torch.float)
labels = torch.tensor(labels, dtype=torch.float)
[8] class TwoLayerNN(torch.nn.Module):
def __init__(self, ):
self.input_dim = 4
self.hidden_dim = 32
self.output_dim = 1
self.learningRate = 0.01
self.w1 = torch.randn(self.input_dim, self.hidden_dim)
self.w2 = torch.randn(self.hidden_dim, self.output_dim)
### activation functions
def sigmoid(self, z):
return torch.sigmoid(z)
### derivative of activation functions
def reluPrime(self,z):
z_clone = z.clone()
z_clone[z < 0] = 0
        return z_clone
    
    def sigmoidPrime(self, x):
        return x * (1 - x)
    
    # Forward propagation
    def forward(self, X):
        self.z1 = torch.matmul(X, self.w1) # 3 X 3 ".dot" does not broadcast in PyTorch
        self.a1 = torch.nn.functional.relu(self.z1)
        self.z2 = torch.matmul(self.a1, self.w2)
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    #backpropagation
    def backward(self, samples, labels, y_hat):
        self.dz2 = self.a2 - labels
        self.dw2 = self.a1.t().mm(self.dz2)
        self.da1 =
        self.dz1 = self.da1* self.reluPrime(self.z1)
        self.dw1 = samples.t().mm(self.dz1)
        self.w1 -= self.learningRate * self.dw1
        self.w2 -= self.learningRate * self.dw2

[9] model = TwoLayerNN()
num_epochs = 50
cost=torch.nn.BCELoss()
for epoch in range(num_epochs):
    y_hat = model(samples)
    epoch_cost = cost(y_hat,labels)
    if epoch % 5 == 0:
        print('Epoch {} | Loss: {}'.format(epoch, epoch_cost))
    model.backward(samples, labels, y_hat)

[10] #Implementing the same neural network using pytorch internal backpropagation
class TwoLayerNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(4, 32)
        self.fc2 = torch.nn.Linear(32, 1)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
    
    def forward(self,x):
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

[11] model = TwoLayerNN()
cost = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
num_epochs = 50
for epoch in range(num_epochs):
    optimizer.zero_grad()
    y_hat = model(samples)
    epoch_cost = cost(y_hat, labels)
    epoch_cost.backward()
    optimizer.step()
    if epoch % 5 == 0:
        print('Epoch {} | Loss: {}'.format(epoch, epoch_cost))

