import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


#Loading the mroz dataset: 
mroz = pd.read_csv("MROZ.csv")
Y = mroz['inlf']  # This is a binary variable
X = mroz[['kidslt6', 'age', 'educ', 'huswage', 'exper', 'expersq']]


#We split the data into training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=503, test_size=250, random_state=1)


#Logistic Regression and its accuracy:
from sklearn.linear_model import LogisticRegression
logimodel_train = LogisticRegression(penalty = None, max_iter = 1000)
logimodel_train.fit(X_train, Y_train)
print("Intercept:", logimodel_train.intercept_)
print("Coefficients:", logimodel_train.coef_)
Y_pred = logimodel_train.predict(X_test)
accuracy_logimodel = np.mean(Y_test == Y_pred)
print(f"Accuracy of Logistic Regression on test set: {accuracy_logimodel}")
contingency_table = pd.crosstab(Y_test, Y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(contingency_table)

Intercept: [0.80948004]
Coefficients: [[-1.42568874 -0.08661254  0.19075556 -0.05338824  0.22426735 -0.00391921]]
Accuracy of Logistic Regression on test set: 0.728
Predicted   0    1  All
Actual                 
0          64   49  113
1          19  118  137
All        83  167  250


#Plotting the sigmoid function: 
z = np.linspace(-10, 10, 100)
y = np.exp(z)/(1 + np.exp(z))
plt.figure(figsize = (8, 6))
plt.plot(z, y, label = "Sigmoid Function")
plt.title("Sigmoid Function")
plt.xlabel("z")
plt.ylabel("Sigmoid(z)")
plt.grid(True)
plt.legend()
plt.show()


import torch
import torch.nn as nn
import torch.optim as optim


#Convert data to PyTorch Tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
print(X_train_tensor.shape)
Y_train_tensor = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1)  # Make Y a 2D tensor
print(Y_train_tensor.shape)

torch.Size([503, 6])
torch.Size([503, 1])


input_size = X_train_tensor.shape[1]
output_size = Y_train_tensor.shape[1]
display(input_size, output_size)

6

1


# Define the Neural Network corresponding to Logistic Regression
model_1 = nn.Sequential(
    nn.Linear(input_size, output_size),
    nn.Sigmoid()  
)
#We are calling this model_1 because there will be other models coming up.


display(model_1[0], model_1[1])

Linear(in_features=6, out_features=1, bias=True)

Sigmoid()


linear, sigm = model_1.children()
print(list(linear.parameters()))
print(list(sigm.parameters())) #this has no parameters so empty list ([]) is printed

[Parameter containing:
tensor([[ 0.2821, -0.3516,  0.0119,  0.1230,  0.3326,  0.3583]],
       requires_grad=True), Parameter containing:
tensor([0.2763], requires_grad=True)]
[]


#Another way of printing the weights and bias of the first layer:
print(model_1[0].weight.data)
print(model_1[0].bias.data)

tensor([[ 0.2821, -0.3516,  0.0119,  0.1230,  0.3326,  0.3583]])
tensor([0.2763])


#This neural network is just a function mapping (x1,...,x6) to sigma(b0 + b1 x1 + ... + b6 x6)
#We can provide specific input values of x1,...,x6 and obtain the output of the neural network:
#For example, for x1 = 0.5, x2 = 1, x3 = 0.44, x4 = -4, x5 = -1.5, x6 = 3:
xvals = torch.tensor([0.5, 1, 0.44, -4, -1.5, 3], dtype = torch.float32)
youtput = model_1(xvals)
print(youtput)
#we can verify the correctness of this value by directly computing sigma(b0 + b1 x1 + ... + b6 x6):
print(1/(1 + np.exp(-model_1[0].bias.data - torch.dot(xvals, model_1[0].weight.data.view(-1)))))

tensor([0.5387], grad_fn=<SigmoidBackward0>)
tensor([0.5387])


# Cross entropy loss function and Adam algorithm
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model_1.parameters(), lr=0.001)


# Train the model
num_epochs = 5000 #this is the number of iterations for the optimization algorithm
for epoch in range(num_epochs):
        optimizer.zero_grad() #reset the gradients
        predictions = model_1(X_train_tensor)
        loss = criterion(predictions,Y_train_tensor)
        loss.backward() #in this step, the gradient is calculated
        optimizer.step() #here the next iterate of the parameters is calculated according to the gradients


nn_weights = model_1[0].weight.data
nn_biases = model_1[0].bias.data
lr_coefficients = logimodel_train.coef_
lr_intercept = logimodel_train.intercept_

# Printing side by side
print(f"Neural Network Weights:\n{nn_weights}\nLogistic Regression Coefficients:\n{lr_coefficients}")
print(f"Neural Network Biases:\n{nn_biases}\nLogistic Regression Intercept:\n{lr_intercept}")

Neural Network Weights:
tensor([[-1.4191, -0.0858,  0.1927, -0.0534,  0.2245, -0.0039]])
Logistic Regression Coefficients:
[[-1.42568874 -0.08661254  0.19075556 -0.05338824  0.22426735 -0.00391921]]
Neural Network Biases:
tensor([0.7464])
Logistic Regression Intercept:
[0.80948004]


predictions = model_1(X_train_tensor)
loss_adam = criterion(predictions,Y_train_tensor)
print(loss_adam)

model_1[0].weight.data = torch.tensor(lr_coefficients, dtype=torch.float32)
model_1[0].bias.data = torch.tensor(lr_intercept, dtype=torch.float32)

# Compute predictions and loss using the modified model
predictions = model_1(X_train_tensor)
loss_scikit_learn = criterion(predictions, Y_train_tensor)
print(loss_scikit_learn)

tensor(0.5381, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5381, grad_fn=<BinaryCrossEntropyBackward0>)


#Let us reset the parameters of model_1 to those output by adam:
model_1[0].weight.data = nn_weights
model_1[0].bias.data = nn_biases


#With the LBFGS Optimization Scheme:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train.values, dtype=torch.float32).unsqueeze(1) 

# Define the Neural Network Architecture
input_size = X_train.shape[1]
output_size = 1  # Binary classification

model_2 = nn.Sequential(
    nn.Linear(input_size, output_size),
    nn.Sigmoid()  
)

# Define the loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.LBFGS(model_2.parameters(), lr=0.001)

# Train the model
num_epochs = 4000 
for epoch in range(num_epochs):
    def closure():
        optimizer.zero_grad()
        predictions = model_2(X_train_tensor)
        loss = criterion(predictions, Y_train_tensor)
        loss.backward()
        return loss
    optimizer.step(closure)


nn_weights = model_2[0].weight.data
nn_biases = model_2[0].bias.data
lr_coefficients = logimodel_train.coef_
lr_intercept = logimodel_train.intercept_

# Printing side by side
print(f"Neural Network Weights:\n{nn_weights}\nLogistic Regression Coefficients:\n{lr_coefficients}")
print(f"Neural Network Biases:\n{nn_biases}\nLogistic Regression Intercept:\n{lr_intercept}")

Neural Network Weights:
tensor([[-1.4252, -0.0866,  0.1907, -0.0534,  0.2243, -0.0039]])
Logistic Regression Coefficients:
[[-1.42568874 -0.08661254  0.19075556 -0.05338824  0.22426735 -0.00391921]]
Neural Network Biases:
tensor([0.8100])
Logistic Regression Intercept:
[0.80948004]


#Predictions on Test Data: 
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test.values, dtype=torch.float32).squeeze()

with torch.no_grad():  # Ensure you're in "evaluation" mode which won't compute gradients
    raw_predictions_1 = model_1(X_test_tensor)
    Y_pred_1 = (raw_predictions_1 > 0.5).float().squeeze()  # Thresholding at 0.5 for binary classification

accuracy_1 = (Y_pred_1 == Y_test_tensor).float().mean().item()  # Compute accuracy
print(f"Accuracy of Adam Optimized NN on test data: {accuracy_1 * 100:.2f}%")

with torch.no_grad():  # Ensure you're in "evaluation" mode which won't compute gradients
    raw_predictions_2 = model_2(X_test_tensor)
    Y_pred_2 = (raw_predictions_2 > 0.5).float().squeeze()  # Thresholding at 0.5 for binary classification

accuracy_2 = (Y_pred_2 == Y_test_tensor).float().mean().item()  # Compute accuracy
print(f"Accuracy of LBFGS Optimized NN on test data: {accuracy_2 * 100:.2f}%")

Accuracy of Adam Optimized NN on test data: 72.40%
Accuracy of LBFGS Optimized NN on test data: 72.80%


n = 750
n_train = 500
n_test = 250

np.random.seed(3) #setting seed to ensure reproducibility
x1 = np.random.uniform(-1, 1, n)
x2 = np.random.uniform(-1, 1, n)
X = np.vstack([x1, x2]).transpose()

Y = (x1 * x2 > 0).astype(np.int64)

#Split the dataset into training and test datasets of sizes 500 and 250 respectively
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=500, test_size=250, random_state=1)

x1_train = X_train[:,0]
x2_train = X_train[:,1]
x1_test = X_test[:,0]
x2_test = X_test[:,1]


#Plotting this dataset
def draw_results(x1, x2, color, plot_title=''):
    plt.figure()
    plt.scatter(x1, x2, c=color, cmap='viridis', alpha=0.7, s = 10);
    plt.colorbar()
    plt.title(plot_title)
    plt.axis('equal')
    plt.xlabel('$x_1$')
    plt.ylabel('$x_2$')
    plt.tight_layout()

draw_results(x1_train, x2_train, color=Y_train, plot_title='Training data')
draw_results(x1_test, x2_test, color=Y_test, plot_title='Test data (ground truth)')


#In the last lecture, we used logistic regression on this dataset: 
from sklearn.linear_model import LogisticRegression
logimodel_train = LogisticRegression(penalty = None, max_iter = 1000)
logimodel_train.fit(X_train, Y_train)
print("Intercept:", logimodel_train.intercept_)
print("Coefficients:", logimodel_train.coef_)
#Prediction for the test data
Y_pred = logimodel_train.predict(X_test)
accuracy_logimodel = np.mean(Y_test == Y_pred)
print(f"Accuracy of Logistic Regression on test set: {accuracy_logimodel}")
contingency_table = pd.crosstab(Y_test, Y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(contingency_table)

Intercept: [-0.06385385]
Coefficients: [[ 0.13228111 -0.12995264]]
Accuracy of Logistic Regression on test set: 0.332
Predicted    0   1  All
Actual                 
0           66  61  127
1          106  17  123
All        172  78  250


#Let us re-do this logistic regression using PyTorch:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32).unsqueeze(1)

# Define the Neural Network Architecture
input_size = X_train.shape[1]
output_size = 1  # Binary classification

model = nn.Sequential(
    nn.Linear(input_size, output_size),
    nn.Sigmoid()  # Sigmoid activation for binary classification
)

# Define the loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 1000  # You can adjust this
for epoch in range(num_epochs):
        optimizer.zero_grad() #reset the gradients
        predictions = model(X_train_tensor)
        loss = criterion(predictions,Y_train_tensor)
        loss.backward()
        optimizer.step()


nn_weights = model[0].weight.data
nn_biases = model[0].bias.data
lr_coefficients = logimodel_train.coef_
lr_intercept = logimodel_train.intercept_

# Printing side by side
print(f"Neural Network Weights:\n{nn_weights}\nLogistic Regression Coefficients:\n{lr_coefficients}")
print(f"Neural Network Biases:\n{nn_biases}\nLogistic Regression Intercept:\n{lr_intercept}")

Neural Network Weights:
tensor([[ 0.1532, -0.1311]])
Logistic Regression Coefficients:
[[ 0.13228111 -0.12995264]]
Neural Network Biases:
tensor([-0.0591])
Logistic Regression Intercept:
[-0.06385385]


#Accuracy of Neural Networks:
#Predictions on Test Data: 
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).squeeze()

with torch.no_grad():  
    raw_predictions = model(X_test_tensor)
    Y_pred = (raw_predictions > 0.5).float().squeeze()  # Thresholding at 0.5 for binary classification

accuracy = (Y_pred == Y_test_tensor).float().mean().item()  # Compute accuracy
print(f"Accuracy of Adam Optimized NN on test data: {accuracy * 100:.2f}%")

Accuracy of Adam Optimized NN on test data: 34.00%


def add_mult_feature(X):
    """Returns an array like X, but with a new feature that's X1 * X2"""
    new_feature = X[:, 0] * X[:, 1]
    return np.hstack([X, new_feature[:, None]])

X_train_feat = add_mult_feature(X_train)
X_test_feat = add_mult_feature(X_test)

logimodel_train_feat = LogisticRegression(penalty = None, max_iter = 2000)
logimodel_train_feat.fit(X_train_feat, Y_train)
print("Intercept:", logimodel_train_feat.intercept_)
print("Coefficients:", logimodel_train_feat.coef_)
#Prediction for the test data
Y_pred_feat = logimodel_train_feat.predict(X_test_feat)
accuracy_logimodel_feat = np.mean(Y_test == Y_pred_feat)
print(f"Accuracy of Logistic Regression (with new feature x1*x2) on test set: {accuracy_logimodel_feat}")
contingency_table_feat = pd.crosstab(Y_test, Y_pred_feat, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(contingency_table_feat)

Intercept: [-7.05612094]
Coefficients: [[  23.07451326   13.61764772 7783.51393629]]
Accuracy of Logistic Regression (with new feature x1*x2) on test set: 1.0
Predicted    0    1  All
Actual                  
0          127    0  127
1            0  123  123
All        127  123  250


n_hidden = 6 #number of hidden units (the quantity p in our notation above)
model = nn.Sequential(
    nn.Linear(input_size, n_hidden),   # First linear layer with n_hidden nodes (hidden layer)
    nn.ReLU(),                   # Activation function after hidden layer
    nn.Linear(n_hidden, output_size),  # Output layer
    nn.Sigmoid()                 # Sigmoid activation for binary classification
)

criterion = nn.BCELoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 1000 
for epoch in range(num_epochs):
        optimizer.zero_grad() #reset the gradients
        predictions = model(X_train_tensor)
        loss = criterion(predictions,Y_train_tensor)
        loss.backward()
        optimizer.step()


with torch.no_grad():  # Ensures you're in "evaluation" mode which won't compute gradients
    raw_predictions = model(X_test_tensor)
    Y_pred = (raw_predictions > 0.5).float().squeeze()  # Thresholding at 0.5 for binary classification

accuracy = (Y_pred == Y_test_tensor).float().mean().item()
print(f"Accuracy on test data: {accuracy * 100:.2f}%")

contingency_table = pd.crosstab(Y_test, Y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(contingency_table)

Accuracy on test data: 88.40%
Predicted  0.0  1.0  All
Actual                  
0          126    1  127
1           28   95  123
All        154   96  250


#With noisy data:
np.random.seed(3)
Y_train_noisy = Y_train.copy()

pts_to_flip = np.random.random(n_train) < 0.1 #we are selecting 10% of the datapoints
Y_train_noisy[pts_to_flip] = 1 - Y_train_noisy[pts_to_flip] #we are corrupting the responses of these datapoints

draw_results(x1_train, x2_train, color=Y_train_noisy, plot_title='Training data with noise')
Y_train_noisy_tensor = torch.tensor(Y_train_noisy, dtype=torch.float32).unsqueeze(1)  # Make Y a 2D tensor


n_hidden = 500
model = nn.Sequential(
    nn.Linear(input_size, n_hidden),   # First linear layer with n_hidden nodes (hidden layer)
    nn.ReLU(),                   # Activation function after hidden layer
    nn.Linear(n_hidden, output_size),  # Output layer
    nn.Sigmoid()                 # Sigmoid activation for binary classification
)

criterion = nn.BCELoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 1000 
for epoch in range(num_epochs):
        optimizer.zero_grad() #reset the gradients
        predictions = model(X_train_tensor)
        loss = criterion(predictions,Y_train_noisy_tensor)
        loss.backward()
        optimizer.step()


with torch.no_grad():  # Ensures you're in "evaluation" mode which won't compute gradients
    raw_predictions = model(X_test_tensor)
    Y_pred = (raw_predictions > 0.5).float().squeeze()  # Thresholding at 0.5 for binary classification

accuracy = (Y_pred == Y_test_tensor).float().mean().item()
print(f"Accuracy on test data: {accuracy * 100:.2f}%")

contingency_table = pd.crosstab(Y_test, Y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(contingency_table)

Accuracy on test data: 98.40%
Predicted  0.0  1.0  All
Actual                  
0          126    1  127
1            3  120  123
All        129  121  250


#Example One:
a = torch.tensor(2., requires_grad = True)
b = torch.tensor(3., requires_grad = True)
c = torch.tensor(4., requires_grad = True)


q = a + 3 * b
r = c ** 2
print(q, r)

tensor(11., grad_fn=<AddBackward0>) tensor(16., grad_fn=<PowBackward0>)


l = q*r
print(l)

tensor(176., grad_fn=<MulBackward0>)


l.backward()


display(a.grad, b.grad, c.grad)

tensor(16.)

tensor(48.)

tensor(88.)

Neural Networks¶

Quick recap: Logistic Regression¶

More on the Sigmoid Function¶

Logistic Regression as a Neural Network¶

Adding a Hidden Layer¶

Backpropagation¶