import os
import kagglehub
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch import nn, optimclass NeuralNetwork(nn.Module):
"""
Feedforward Neural Network with one hidden layer.
Architecture:
- Input layer: n_features neurons
- Hidden layer: n_hidden neurons (ReLU activation)
- Output layer: n_classes neurons (logits - no softmax, CrossEntropyLoss does this)
"""
def __init__(self, n_features, n_hidden, n_classes):
super().__init__()
# Define layers
self.fc1 = nn.Linear(n_features, n_hidden) # Input to hidden
self.relu = nn.ReLU() # ReLU activation
self.fc2 = nn.Linear(n_hidden, n_classes) # Hidden to output
# Optional: Custom weight initialization (He initialization for ReLU)
nn.init.kaiming_normal_(self.fc1.weight, nonlinearity="relu")
nn.init.kaiming_normal_(self.fc2.weight, nonlinearity="relu")
def forward(self, x):
"""Forward pass through the network."""
x = self.fc1(x) # Linear transformation
x = self.relu(x) # ReLU activation
x = self.fc2(x) # Linear transformation (logits)
return x # No softmax - CrossEntropyLoss handles it!device = "cpu"# Download data from kaggle
path = kagglehub.dataset_download("yashdevladdha/uber-ride-analytics-dashboard")
# Load data into Pandas DataFrame
csv_file = os.path.join(path, "ncr_ride_bookings.csv")
df = pd.read_csv(csv_file)
print("โ
Data loaded successfully!")โ
Data loaded successfully!
Target variabele :
Payment Method
Features :
Avg VTATAvg CTATBooking ValueRide Distance
df = (
df.loc[:, ("Payment Method", "Avg VTAT", "Avg CTAT", "Booking Value", "Ride Distance")]
.dropna()
.reset_index(drop=True)
)print(df["Payment Method"].unique())['UPI' 'Debit Card' 'Cash' 'Uber Wallet' 'Credit Card']
โ๏ธยถ
Maak NumPy array aan voor de target .
y = df["Payment Method"].values
# Make a map to convert payment methods to integers
payment_method_map = {method: idx for idx, method in enumerate(df["Payment Method"].unique())}
y = np.array([payment_method_map[method] for method in y])
print(np.unique(y))[0 1 2 3 4]
โ๏ธยถ
Maak NumPy array aan voor de featurematrix mรจt standaardschaling.
scaler = StandardScaler()
X = df[["Avg VTAT", "Avg CTAT", "Booking Value", "Ride Distance"]].values
X_scaled = scaler.fit_transform(X)โ๏ธยถ
Maak random 80/20% train/test split van en .
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")
print(f"Training set shape: X={X_train.shape}, y={y_train.shape}")
print(f"Test set shape: X={X_test.shape}, y={y_test.shape}")Training set size: 81600
Test set size: 20400
Training set shape: X=(81600, 4), y=(81600,)
Test set shape: X=(20400, 4), y=(20400,)
โ๏ธยถ
Converteer en naar PyTorch formaat.
X_train_tensor = torch.FloatTensor(X_train).to(device)
X_test_tensor = torch.FloatTensor(X_test).to(device)
y_train_tensor = torch.LongTensor(y_train).to(device)
y_test_tensor = torch.LongTensor(y_test).to(device)
โ๏ธยถ
Train een NeuralNetwork model (zie hierboven) met 10 hidden neurons.
# Create model instance
n_features = X_train.shape[1] # 4 features
n_hidden = 10
n_classes = len(payment_method_map)
model = NeuralNetwork(n_features, n_hidden, n_classes).to(device)
print("โ PyTorch Neural Network Model Created!")
print("\nModel Architecture:")
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters())}")
print("\nParameter details:")
for name, param in model.named_parameters():
print(f" {name}: {param.shape} ({param.numel()} parameters)")โ PyTorch Neural Network Model Created!
Model Architecture:
NeuralNetwork(
(fc1): Linear(in_features=4, out_features=10, bias=True)
(relu): ReLU()
(fc2): Linear(in_features=10, out_features=5, bias=True)
)
Total parameters: 105
Parameter details:
fc1.weight: torch.Size([10, 4]) (40 parameters)
fc1.bias: torch.Size([10]) (10 parameters)
fc2.weight: torch.Size([5, 10]) (50 parameters)
fc2.bias: torch.Size([5]) (5 parameters)
โ๏ธยถ
Definieer de Cross Entropy Loss functie en Stochastic Gradient Descent als optimalisatiealgoritme (learning rate = 0.02)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.02)
print("โ Loss function and optimizer configured!")
print(f"\nLoss function: {criterion}")
print(f"Optimizer: {optimizer}")
print(f"Learning rate: {optimizer.param_groups[0]['lr']}")โ Loss function and optimizer configured!
Loss function: CrossEntropyLoss()
Optimizer: SGD (
Parameter Group 0
dampening: 0
differentiable: False
foreach: None
fused: None
lr: 0.02
maximize: False
momentum: 0
nesterov: False
weight_decay: 0
)
Learning rate: 0.02
โ๏ธยถ
Train the model for 2000 epochs and plot the test/training loss en accuraatheid.
# Training configuration
n_epochs = 2000
# Training history
train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []
print("Training Neural Network with PyTorch...")
print(f"Architecture: {n_features} โ {n_hidden} (ReLU) โ {n_classes}")
print(f"Learning rate: {optimizer.param_groups[0]['lr']}")
print(f"Epochs: {n_epochs}")
print("\nTraining progress:")
print("-" * 70)
for epoch in range(n_epochs):
# ============ TRAINING MODE ============
model.train() # Set model to training mode
# Forward pass
train_outputs = model(X_train_tensor)
train_loss = criterion(train_outputs, y_train_tensor)
# Backward pass and optimization
optimizer.zero_grad() # Clear previous gradients
train_loss.backward() # Compute gradients (backpropagation!)
optimizer.step() # Update weights
# ============ EVALUATION MODE ============
model.eval() # Set model to evaluation mode
with torch.no_grad(): # Disable gradient computation for evaluation
# Training accuracy
_, train_predicted = torch.max(train_outputs, 1)
train_acc = (train_predicted == y_train_tensor).float().mean()
# Test loss and accuracy
test_outputs = model(X_test_tensor)
test_loss = criterion(test_outputs, y_test_tensor)
_, test_predicted = torch.max(test_outputs, 1)
test_acc = (test_predicted == y_test_tensor).float().mean()
# Store history
train_losses.append(train_loss.item())
test_losses.append(test_loss.item())
train_accuracies.append(train_acc.item())
test_accuracies.append(test_acc.item())
# Print progress every 200 epochs
if (epoch + 1) % 200 == 0 or epoch == 0:
print(
f"Epoch {epoch + 1:4d} | "
f"Train Loss: {train_loss.item():.4f} | "
f"Test Loss: {test_loss.item():.4f} | "
f"Train Acc: {train_acc.item():.2%} | "
f"Test Acc: {test_acc.item():.2%}"
)
print("-" * 70)
print("\nโ Training completed!")
print("\nFinal Results:")
print(f" Training Loss: {train_losses[-1]:.4f}")
print(f" Test Loss: {test_losses[-1]:.4f}")
print(f" Training Accuracy: {train_accuracies[-1]:.2%}")
print(f" Test Accuracy: {test_accuracies[-1]:.2%}")Training Neural Network with PyTorch...
Architecture: 4 โ 10 (ReLU) โ 5
Learning rate: 0.02
Epochs: 2000
Training progress:
----------------------------------------------------------------------
Epoch 1 | Train Loss: 2.1648 | Test Loss: 2.1529 | Train Acc: 16.45% | Test Acc: 16.88%
Epoch 200 | Train Loss: 1.4613 | Test Loss: 1.4589 | Train Acc: 43.11% | Test Acc: 43.03%
Epoch 400 | Train Loss: 1.4191 | Test Loss: 1.4152 | Train Acc: 44.52% | Test Acc: 44.53%
Epoch 600 | Train Loss: 1.4101 | Test Loss: 1.4062 | Train Acc: 44.86% | Test Acc: 44.89%
Epoch 800 | Train Loss: 1.4059 | Test Loss: 1.4021 | Train Acc: 44.99% | Test Acc: 44.97%
Epoch 1000 | Train Loss: 1.4034 | Test Loss: 1.3996 | Train Acc: 45.01% | Test Acc: 45.00%
Epoch 1200 | Train Loss: 1.4017 | Test Loss: 1.3979 | Train Acc: 45.01% | Test Acc: 45.00%
Epoch 1400 | Train Loss: 1.4005 | Test Loss: 1.3968 | Train Acc: 45.01% | Test Acc: 45.00%
Epoch 1600 | Train Loss: 1.3996 | Test Loss: 1.3959 | Train Acc: 45.01% | Test Acc: 45.00%
Epoch 1800 | Train Loss: 1.3989 | Test Loss: 1.3953 | Train Acc: 45.01% | Test Acc: 45.00%
Epoch 2000 | Train Loss: 1.3984 | Test Loss: 1.3947 | Train Acc: 45.01% | Test Acc: 45.00%
----------------------------------------------------------------------
โ Training completed!
Final Results:
Training Loss: 1.3984
Test Loss: 1.3947
Training Accuracy: 45.01%
Test Accuracy: 45.00%