Leerdoelenยถ
Feedforward neural network implementeren met PyTorch
PyTorch nn.Module gebruiken voor model definitie
ReLU activatie en Softmax output voor multi-class classificatie
Cross-entropy loss gebruiken met PyTorch
Model trainen met PyTorch optimizer (SGD)
Model performance evalueren op 3-class classificatie
PyTorchโs ingebouwde tools gebruiken voor visualisatie en debugging
import numpy as np
import pandas as pd
import plotly.express as px
import torch
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch import nn, optim
rng = np.random.default_rng(67)
torch.manual_seed(67)
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")Using device: cpu
1. Data Loading en Exploratieยถ
We gebruiken de volledige Iris dataset met alle 3 klassen:
Setosa (klasse 0)
Versicolor (klasse 1)
Virginica (klasse 2)
# Load the complete Iris dataset
iris = load_iris()
# Create a DataFrame
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df["species"] = iris.target
df["species_name"] = df["species"].map({0: "setosa", 1: "versicolor", 2: "virginica"})
print(f"Total samples: {len(df)}")
print("\nClass distribution:")
for i, name in enumerate(["setosa", "versicolor", "virginica"]):
count = (df["species"] == i).sum()
print(f" {name.capitalize()} ({i}): {count} samples ({100 * count / len(df):.1f}%)")
print("\nFeatures: {iris.feature_names}")
print("\nFirst few rows from each class:")
df.groupby("species_name").head(3)Total samples: 150
Class distribution:
Setosa (0): 50 samples (33.3%)
Versicolor (1): 50 samples (33.3%)
Virginica (2): 50 samples (33.3%)
Features: {iris.feature_names}
First few rows from each class:
# Visualize the 3 classes in 2D (using the two most discriminative features)
fig = px.scatter(
df,
x="petal length (cm)",
y="petal width (cm)",
color="species_name",
color_discrete_map={"setosa": "#1f77b4", "versicolor": "#e377c2", "virginica": "#17becf"},
title="Iris Dataset: All 3 Classes (Petal Dimensions)",
labels={"petal length (cm)": "Petal Length (cm)", "petal width (cm)": "Petal Width (cm)"},
width=800,
height=600,
)
fig.update_traces(marker={"size": 10, "line": {"width": 1, "color": "white"}})
fig.show()2. Data Voorbereidingยถ
We bereiden de data voor door:
Train/test split (80/20)
Feature standardization (mean=0, std=1)
One-hot encoding van de target labels voor multi-class classificatie
# Prepare features and target
X = iris.data
y = iris.target
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Convert to PyTorch tensors (no need for one-hot encoding - PyTorch handles it automatically)
X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
y_train_tensor = torch.LongTensor(y_train).to(device) # CrossEntropyLoss expects class indices
y_test_tensor = torch.LongTensor(y_test).to(device)
print(f"Training set: {X_train_tensor.shape[0]} samples")
print(f"Test set: {X_test_tensor.shape[0]} samples")
print(f"Number of features: {X_train_tensor.shape[1]}")
print(f"Number of classes: {len(np.unique(y))}")
print(f"\nData is on device: {X_train_tensor.device}")
print("\nClass distribution in training set:")
for i, name in enumerate(["Setosa", "Versicolor", "Virginica"]):
count = (y_train == i).sum()
print(f" {name} ({i}): {count} samples ({100 * count / len(y_train):.1f}%)")
print("\nNote: PyTorch's CrossEntropyLoss handles softmax and one-hot encoding internally!")Training set: 120 samples
Test set: 30 samples
Number of features: 4
Number of classes: 3
Data is on device: cpu
Class distribution in training set:
Setosa (0): 40 samples (33.3%)
Versicolor (1): 40 samples (33.3%)
Virginica (2): 40 samples (33.3%)
Note: PyTorch's CrossEntropyLoss handles softmax and one-hot encoding internally!
3. Neural Network Architectuur met PyTorchยถ
We bouwen een feedforward neural network met PyTorchโs nn.Module:
Input Layer โ Hidden Layer โ Output Layer
Input: 4 features (sepal length, sepal width, petal length, petal width)
Hidden Layer: 8 neurons met ReLU activatie
Output Layer: 3 neurons (voor 3 klassen)
Loss Functieยถ
CrossEntropyLoss in PyTorch (incl. Softmax):
# Define the Neural Network using PyTorch's nn.Module
class NeuralNetwork(nn.Module):
"""
Feedforward Neural Network with one hidden layer.
Architecture:
- Input layer: n_features neurons
- Hidden layer: n_hidden neurons (ReLU activation)
- Output layer: n_classes neurons (logits - no softmax, CrossEntropyLoss does this)
"""
def __init__(self, n_features, n_hidden, n_classes):
super().__init__()
# Define layers
self.fc1 = nn.Linear(n_features, n_hidden) # Input to hidden
self.relu = nn.ReLU() # ReLU activation
self.fc2 = nn.Linear(n_hidden, n_classes) # Hidden to output
# Optional: Custom weight initialization (He initialization for ReLU)
nn.init.kaiming_normal_(self.fc1.weight, nonlinearity="relu")
nn.init.kaiming_normal_(self.fc2.weight, nonlinearity="relu")
def forward(self, x):
"""Forward pass through the network."""
x = self.fc1(x) # Linear transformation
x = self.relu(x) # ReLU activation
x = self.fc2(x) # Linear transformation (logits)
return x # No softmax - CrossEntropyLoss handles it!
# Create model instance
n_features = X_train_scaled.shape[1] # 4 features
n_hidden = 8 # 8 hidden neurons
n_classes = 3 # 3 output classes
model = NeuralNetwork(n_features, n_hidden, n_classes).to(device)
print("โ PyTorch Neural Network Model Created!")
print("\nModel Architecture:")
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters())}")
print("\nParameter details:")
for name, param in model.named_parameters():
print(f" {name}: {param.shape} ({param.numel()} parameters)")โ PyTorch Neural Network Model Created!
Model Architecture:
NeuralNetwork(
(fc1): Linear(in_features=4, out_features=8, bias=True)
(relu): ReLU()
(fc2): Linear(in_features=8, out_features=3, bias=True)
)
Total parameters: 67
Parameter details:
fc1.weight: torch.Size([8, 4]) (32 parameters)
fc1.bias: torch.Size([8]) (8 parameters)
fc2.weight: torch.Size([3, 8]) (24 parameters)
fc2.bias: torch.Size([3]) (3 parameters)
4. Loss Functie en Optimizerยถ
PyTorch maakt training veel eenvoudiger:
Loss functie:
nn.CrossEntropyLoss()combineert Softmax + Cross-EntropyOptimizer:
optim.SGD()implementeert gradient descent met momentum optieBackpropagation: Gebeurt automatisch met
loss.backward()
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss() # Combines Softmax + Cross-Entropy
optimizer = optim.SGD(model.parameters(), lr=0.1) # Stochastic Gradient Descent
print("โ Loss function and optimizer configured!")
print(f"\nLoss function: {criterion}")
print(f"Optimizer: {optimizer}")
print(f"Learning rate: {optimizer.param_groups[0]['lr']}")โ Loss function and optimizer configured!
Loss function: CrossEntropyLoss()
Optimizer: SGD (
Parameter Group 0
dampening: 0
differentiable: False
foreach: None
fused: None
lr: 0.1
maximize: False
momentum: 0
nesterov: False
weight_decay: 0
)
Learning rate: 0.1
5. Training het Neural Network met PyTorchยถ
De training loop is veel eenvoudiger met PyTorch:
Forward pass:
outputs = model(inputs)Compute loss:
loss = criterion(outputs, targets)Backward pass:
loss.backward()(automatische backpropagation!)Update weights:
optimizer.step()
# Training configuration
n_epochs = 2000
# Training history
train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []
print("Training Neural Network with PyTorch...")
print(f"Architecture: {n_features} โ {n_hidden} (ReLU) โ {n_classes}")
print(f"Learning rate: {optimizer.param_groups[0]['lr']}")
print(f"Epochs: {n_epochs}")
print("\nTraining progress:")
print("-" * 70)
for epoch in range(n_epochs):
# ============ TRAINING MODE ============
model.train() # Set model to training mode
# Forward pass
train_outputs = model(X_train_tensor)
train_loss = criterion(train_outputs, y_train_tensor)
# Backward pass and optimization
optimizer.zero_grad() # Clear previous gradients
train_loss.backward() # Compute gradients (backpropagation!)
optimizer.step() # Update weights
# ============ EVALUATION MODE ============
model.eval() # Set model to evaluation mode
with torch.no_grad(): # Disable gradient computation for evaluation
# Training accuracy
_, train_predicted = torch.max(train_outputs, 1)
train_acc = (train_predicted == y_train_tensor).float().mean()
# Test loss and accuracy
test_outputs = model(X_test_tensor)
test_loss = criterion(test_outputs, y_test_tensor)
_, test_predicted = torch.max(test_outputs, 1)
test_acc = (test_predicted == y_test_tensor).float().mean()
# Store history
train_losses.append(train_loss.item())
test_losses.append(test_loss.item())
train_accuracies.append(train_acc.item())
test_accuracies.append(test_acc.item())
# Print progress every 200 epochs
if (epoch + 1) % 200 == 0 or epoch == 0:
print(
f"Epoch {epoch + 1:4d} | "
f"Train Loss: {train_loss.item():.4f} | "
f"Test Loss: {test_loss.item():.4f} | "
f"Train Acc: {train_acc.item():.2%} | "
f"Test Acc: {test_acc.item():.2%}"
)
print("-" * 70)
print("\nโ Training completed!")
print("\nFinal Results:")
print(f" Training Loss: {train_losses[-1]:.4f}")
print(f" Test Loss: {test_losses[-1]:.4f}")
print(f" Training Accuracy: {train_accuracies[-1]:.2%}")
print(f" Test Accuracy: {test_accuracies[-1]:.2%}")Training Neural Network with PyTorch...
Architecture: 4 โ 8 (ReLU) โ 3
Learning rate: 0.1
Epochs: 2000
Training progress:
----------------------------------------------------------------------
Epoch 1 | Train Loss: 1.1086 | Test Loss: 0.8888 | Train Acc: 43.33% | Test Acc: 63.33%
Epoch 200 | Train Loss: 0.1244 | Test Loss: 0.1427 | Train Acc: 96.67% | Test Acc: 96.67%
Epoch 200 | Train Loss: 0.1244 | Test Loss: 0.1427 | Train Acc: 96.67% | Test Acc: 96.67%
Epoch 400 | Train Loss: 0.0727 | Test Loss: 0.0897 | Train Acc: 97.50% | Test Acc: 96.67%
Epoch 400 | Train Loss: 0.0727 | Test Loss: 0.0897 | Train Acc: 97.50% | Test Acc: 96.67%
Epoch 600 | Train Loss: 0.0565 | Test Loss: 0.0759 | Train Acc: 97.50% | Test Acc: 96.67%
Epoch 600 | Train Loss: 0.0565 | Test Loss: 0.0759 | Train Acc: 97.50% | Test Acc: 96.67%
Epoch 800 | Train Loss: 0.0488 | Test Loss: 0.0707 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 800 | Train Loss: 0.0488 | Test Loss: 0.0707 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1000 | Train Loss: 0.0442 | Test Loss: 0.0692 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1000 | Train Loss: 0.0442 | Test Loss: 0.0692 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1200 | Train Loss: 0.0411 | Test Loss: 0.0695 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1200 | Train Loss: 0.0411 | Test Loss: 0.0695 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1400 | Train Loss: 0.0388 | Test Loss: 0.0713 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1400 | Train Loss: 0.0388 | Test Loss: 0.0713 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1600 | Train Loss: 0.0369 | Test Loss: 0.0742 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1600 | Train Loss: 0.0369 | Test Loss: 0.0742 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1800 | Train Loss: 0.0354 | Test Loss: 0.0766 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 1800 | Train Loss: 0.0354 | Test Loss: 0.0766 | Train Acc: 98.33% | Test Acc: 96.67%
Epoch 2000 | Train Loss: 0.0341 | Test Loss: 0.0790 | Train Acc: 98.33% | Test Acc: 96.67%
----------------------------------------------------------------------
โ Training completed!
Final Results:
Training Loss: 0.0341
Test Loss: 0.0790
Training Accuracy: 98.33%
Test Accuracy: 96.67%
Epoch 2000 | Train Loss: 0.0341 | Test Loss: 0.0790 | Train Acc: 98.33% | Test Acc: 96.67%
----------------------------------------------------------------------
โ Training completed!
Final Results:
Training Loss: 0.0341
Test Loss: 0.0790
Training Accuracy: 98.33%
Test Accuracy: 96.67%
6. Visualisatie van Training Procesยถ
We visualiseren hoe de loss en accuracy evolueren tijdens training.
# Create DataFrames for plotting
epochs = np.arange(1, n_epochs + 1)
# Loss history
loss_df = pd.DataFrame(
{
"Epoch": np.tile(epochs, 2),
"Loss": train_losses + test_losses,
"Dataset": ["Training"] * n_epochs + ["Test"] * n_epochs,
}
)
# Plot loss
fig_loss = px.line(
loss_df,
x="Epoch",
y="Loss",
color="Dataset",
title="Categorical Cross-Entropy Loss Over Training",
labels={"Loss": "Cross-Entropy Loss"},
color_discrete_map={"Training": "blue", "Test": "red"},
width=900,
height=500,
)
fig_loss.show()
# Accuracy history
acc_df = pd.DataFrame(
{
"Epoch": np.tile(epochs, 2),
"Accuracy": train_accuracies + test_accuracies,
"Dataset": ["Training"] * n_epochs + ["Test"] * n_epochs,
}
)
# Plot accuracy
fig_acc = px.line(
acc_df,
x="Epoch",
y="Accuracy",
color="Dataset",
title="Classification Accuracy Over Training",
color_discrete_map={"Training": "blue", "Test": "red"},
width=900,
height=500,
)
fig_acc.update_yaxes(tickformat=".0%", range=[0, 1.05])
fig_acc.show()7. Model Evaluatieยถ
Laten we de voorspellingen analyseren en een confusion matrix maken om te zien hoe goed het model elke klasse herkent.
# Make predictions on test set
model.eval()
with torch.no_grad():
test_outputs = model(X_test_tensor)
test_probs = torch.softmax(test_outputs, dim=1) # Convert logits to probabilities
test_pred_labels = torch.argmax(test_probs, dim=1)
# Convert to numpy for easier handling
test_probs_np = test_probs.cpu().numpy()
test_pred_labels_np = test_pred_labels.cpu().numpy()
# Create results DataFrame
species_names = ["Setosa", "Versicolor", "Virginica"]
results_df = pd.DataFrame(
{
"True Label": y_test,
"True Species": [species_names[i] for i in y_test],
"Predicted Label": test_pred_labels_np,
"Predicted Species": [species_names[i] for i in test_pred_labels_np],
"Correct": y_test == test_pred_labels_np,
}
)
# Add probabilities for each class
for i, name in enumerate(species_names):
results_df[f"P({name})"] = test_probs_np[:, i]
print("Test Set Predictions:")
print(results_df)
print(f"\nTest Accuracy: {(y_test == test_pred_labels_np).mean():.2%}")
print(f"Correct predictions: {(y_test == test_pred_labels_np).sum()} / {len(y_test)}")Test Set Predictions:
True Label True Species Predicted Label Predicted Species Correct \
0 0 Setosa 0 Setosa True
1 2 Virginica 2 Virginica True
2 1 Versicolor 1 Versicolor True
3 1 Versicolor 1 Versicolor True
4 0 Setosa 0 Setosa True
5 1 Versicolor 1 Versicolor True
6 0 Setosa 0 Setosa True
7 0 Setosa 0 Setosa True
8 2 Virginica 2 Virginica True
9 1 Versicolor 1 Versicolor True
10 2 Virginica 2 Virginica True
11 2 Virginica 2 Virginica True
12 2 Virginica 2 Virginica True
13 1 Versicolor 1 Versicolor True
14 0 Setosa 0 Setosa True
15 0 Setosa 0 Setosa True
16 0 Setosa 0 Setosa True
17 1 Versicolor 1 Versicolor True
18 1 Versicolor 1 Versicolor True
19 2 Virginica 2 Virginica True
20 0 Setosa 0 Setosa True
21 2 Virginica 2 Virginica True
22 1 Versicolor 1 Versicolor True
23 2 Virginica 2 Virginica True
24 2 Virginica 2 Virginica True
25 1 Versicolor 2 Virginica False
26 1 Versicolor 1 Versicolor True
27 0 Setosa 0 Setosa True
28 2 Virginica 2 Virginica True
29 0 Setosa 0 Setosa True
P(Setosa) P(Versicolor) P(Virginica)
0 9.999610e-01 0.000039 1.757449e-10
1 9.504844e-05 0.239251 7.606544e-01
2 1.875296e-02 0.980820 4.270269e-04
3 5.445493e-03 0.994315 2.393856e-04
4 9.999787e-01 0.000021 3.070104e-11
5 2.603072e-03 0.989533 7.863755e-03
6 9.999881e-01 0.000012 9.195798e-13
7 9.981330e-01 0.001867 1.980528e-11
8 1.961295e-07 0.002403 9.975963e-01
9 6.045294e-03 0.986548 7.407081e-03
10 2.199855e-11 0.000032 9.999676e-01
11 5.100688e-09 0.000081 9.999187e-01
12 3.367440e-10 0.000007 9.999931e-01
13 9.462506e-04 0.997271 1.782788e-03
14 9.997646e-01 0.000235 1.141330e-11
15 9.997123e-01 0.000288 2.536426e-11
16 9.972774e-01 0.002723 2.407829e-13
17 2.418574e-04 0.999639 1.188727e-04
18 1.700104e-04 0.991062 8.768411e-03
19 2.315869e-04 0.371975 6.277933e-01
20 9.993082e-01 0.000692 1.394408e-12
21 1.453819e-10 0.000002 9.999981e-01
22 3.425601e-05 0.998091 1.874920e-03
23 1.270035e-06 0.188950 8.110488e-01
24 1.833154e-10 0.000009 9.999906e-01
25 1.128598e-05 0.264268 7.357206e-01
26 9.528662e-05 0.998217 1.687202e-03
27 9.934990e-01 0.006501 4.232325e-12
28 4.847093e-07 0.009062 9.909378e-01
29 9.994584e-01 0.000542 1.297925e-11
Test Accuracy: 96.67%
Correct predictions: 29 / 30
# Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, test_pred_labels_np)
# Create confusion matrix heatmap
cm_df = pd.DataFrame(
cm,
index=["Setosa (0)", "Versicolor (1)", "Virginica (2)"],
columns=["Setosa (0)", "Versicolor (1)", "Virginica (2)"],
)
fig = px.imshow(
cm_df,
text_auto=True,
color_continuous_scale="Blues",
title="Confusion Matrix: PyTorch Neural Network Classification",
labels={"x": "Predicted Label", "y": "True Label", "color": "Count"},
width=700,
height=600,
)
fig.update_traces(textfont_size=18)
fig.show()
print("\nConfusion Matrix Analysis:")
print(f"Diagonal (correct predictions): {np.diag(cm)}")
print(f"Total correct: {np.trace(cm)} / {len(y_test)}")
print("\nPer-class accuracy:")
for i, name in enumerate(species_names):
class_total = np.sum(cm[i, :])
class_correct = cm[i, i]
print(f" {name}: {class_correct}/{class_total} = {class_correct / class_total:.2%}")
Confusion Matrix Analysis:
Diagonal (correct predictions): [10 9 10]
Total correct: 29 / 30
Per-class accuracy:
Setosa: 10/10 = 100.00%
Versicolor: 9/10 = 90.00%
Virginica: 10/10 = 100.00%
8. Decision Boundaries Visualisatieยถ
We visualiseren de decision boundaries van het neural network in 2D (gebruikmakend van de twee belangrijkste features).
# Train a 2D neural network for visualization (using only petal features)
X_train_2d = X_train_scaled[:, [2, 3]] # Petal length and width
X_test_2d = X_test_scaled[:, [2, 3]]
# Convert to PyTorch tensors
X_train_2d_tensor = torch.FloatTensor(X_train_2d).to(device)
X_test_2d_tensor = torch.FloatTensor(X_test_2d).to(device)
# Create and train 2D network
model_2d = NeuralNetwork(n_features=2, n_hidden=8, n_classes=3).to(device)
criterion_2d = nn.CrossEntropyLoss()
optimizer_2d = optim.SGD(model_2d.parameters(), lr=0.1)
print("Training 2D neural network for visualization...")
model_2d.train()
for epoch in range(1000):
outputs = model_2d(X_train_2d_tensor)
loss = criterion_2d(outputs, y_train_tensor)
optimizer_2d.zero_grad()
loss.backward()
optimizer_2d.step()
# Evaluate 2D model
model_2d.eval()
with torch.no_grad():
test_outputs_2d = model_2d(X_test_2d_tensor)
_, test_pred_2d = torch.max(test_outputs_2d, 1)
acc_2d = (test_pred_2d == y_test_tensor).float().mean()
print(f"2D Model Test Accuracy: {acc_2d.item():.2%}")Training 2D neural network for visualization...
2D Model Test Accuracy: 96.67%
2D Model Test Accuracy: 96.67%
# Create mesh grid for decision boundary visualization
x1_min, x1_max = X_train_2d[:, 0].min() - 0.5, X_train_2d[:, 0].max() + 0.5
x2_min, x2_max = X_train_2d[:, 1].min() - 0.5, X_train_2d[:, 1].max() + 0.5
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, 200), np.linspace(x2_min, x2_max, 200))
# Predict for each point in the grid
X_grid = np.c_[xx1.ravel(), xx2.ravel()]
X_grid_tensor = torch.FloatTensor(X_grid).to(device)
model_2d.eval()
with torch.no_grad():
Z = model_2d(X_grid_tensor)
Z_labels = torch.argmax(Z, dim=1).cpu().numpy().reshape(xx1.shape)
# Create decision boundary plot
fig = px.imshow(
Z_labels,
x=np.linspace(x1_min, x1_max, 200),
y=np.linspace(x2_min, x2_max, 200),
color_continuous_scale=[[0, "#1f77b4"], [0.5, "#e377c2"], [1, "#17becf"]],
origin="lower",
title="PyTorch Neural Network Decision Boundaries (3 Classes)",
labels={
"x": "Petal Length (standardized)",
"y": "Petal Width (standardized)",
"color": "Class",
},
width=1000,
height=700,
)
# Add training data points
train_df = pd.DataFrame(
{
"Petal Length": X_train_2d[:, 0],
"Petal Width": X_train_2d[:, 1],
"Species": [species_names[i] for i in y_train],
"Type": "Training",
}
)
# Add test data points
test_df = pd.DataFrame(
{
"Petal Length": X_test_2d[:, 0],
"Petal Width": X_test_2d[:, 1],
"Species": [species_names[i] for i in y_test],
"Type": "Test",
}
)
# Add scatter traces for training data
for i, species in enumerate(species_names):
train_species = train_df[train_df["Species"] == species]
colors = ["#1f77b4", "#e377c2", "#17becf"]
fig.add_scatter(
x=train_species["Petal Length"],
y=train_species["Petal Width"],
mode="markers",
marker={
"size": 10,
"color": colors[i],
"line": {"width": 2, "color": "white"},
"symbol": "circle",
},
name=f"{species} (train)",
)
# Add scatter traces for test data
for i, species in enumerate(species_names):
test_species = test_df[test_df["Species"] == species]
colors = ["#1f77b4", "#e377c2", "#17becf"]
fig.add_scatter(
x=test_species["Petal Length"],
y=test_species["Petal Width"],
mode="markers",
marker={
"size": 10,
"color": colors[i],
"line": {"width": 2, "color": "black"},
"symbol": "triangle-up",
},
name=f"{species} (test)",
)
# Hide the colorbar (legend is sufficient with scatter traces)
fig.update_coloraxes(showscale=False)
fig.show()
print("\nโ Decision boundaries visualisatie compleet!")
print(
"Merk op hoe het neural network non-lineaire decision boundaries kan leren dankzij de hidden layer."
)
โ Decision boundaries visualisatie compleet!
Merk op hoe het neural network non-lineaire decision boundaries kan leren dankzij de hidden layer.
9. Model Inspectie met PyTorchยถ
PyTorch biedt handige tools om het model te inspecteren.
# Model Summary
print("=" * 70)
print("MODEL SUMMARY")
print("=" * 70)
print(f"\n{model}\n")
print("=" * 70)
print("PARAMETER DETAILS")
print("=" * 70)
for name, param in model.named_parameters():
print(f"\n{name}:")
print(f" Shape: {param.shape}")
print(f" Number of parameters: {param.numel()}")
print(f" Requires gradient: {param.requires_grad}")
print(f" Sample values:\n{param.data[:3] if param.dim() > 1 else param.data}")
print(f"\n{'=' * 70}")
print(f"TOTAL PARAMETERS: {sum(p.numel() for p in model.parameters())}")
print(f"TRAINABLE PARAMETERS: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
print("=" * 70)
# Gradient inspection (after training)
print("\n" + "=" * 70)
print("GRADIENT INFORMATION (from last training step)")
print("=" * 70)
model.train()
outputs = model(X_train_tensor)
loss = criterion(outputs, y_train_tensor)
optimizer.zero_grad()
loss.backward()
for name, param in model.named_parameters():
if param.grad is not None:
print(f"\n{name}:")
print(f" Gradient shape: {param.grad.shape}")
print(f" Gradient mean: {param.grad.mean().item():.6f}")
print(f" Gradient std: {param.grad.std().item():.6f}")
print(f" Gradient norm: {param.grad.norm().item():.6f}")======================================================================
MODEL SUMMARY
======================================================================
NeuralNetwork(
(fc1): Linear(in_features=4, out_features=8, bias=True)
(relu): ReLU()
(fc2): Linear(in_features=8, out_features=3, bias=True)
)
======================================================================
PARAMETER DETAILS
======================================================================
fc1.weight:
Shape: torch.Size([8, 4])
Number of parameters: 32
Requires gradient: True
Sample values:
tensor([[ 0.8500, -0.7660, 2.8484, 1.1504],
[-0.8707, -0.0582, 0.6883, 0.7688],
[-0.8666, 0.2275, -1.0095, -1.8327]])
fc1.bias:
Shape: torch.Size([8])
Number of parameters: 8
Requires gradient: True
Sample values:
tensor([-0.1341, 0.3674, 0.1801, 0.7176, 0.6453, -0.5758, 2.2791, 0.4985])
fc2.weight:
Shape: torch.Size([3, 8])
Number of parameters: 24
Requires gradient: True
Sample values:
tensor([[-1.0107, -0.9730, 1.2355, 0.3388, -1.5030, -0.5591, 0.0304, -0.8294],
[-0.3976, -0.1520, -1.5935, 0.0036, -0.0645, -0.7683, 1.4653, -0.1179],
[ 1.9803, 0.4127, -0.7380, -1.3845, -0.0356, 1.1195, -2.5978, -0.1518]])
fc2.bias:
Shape: torch.Size([3])
Number of parameters: 3
Requires gradient: True
Sample values:
tensor([ 0.0111, 1.0494, -1.1958])
======================================================================
TOTAL PARAMETERS: 67
TRAINABLE PARAMETERS: 67
======================================================================
======================================================================
GRADIENT INFORMATION (from last training step)
======================================================================
fc1.weight:
Gradient shape: torch.Size([8, 4])
Gradient mean: -0.000161
Gradient std: 0.000948
Gradient norm: 0.005355
fc1.bias:
Gradient shape: torch.Size([8])
Gradient mean: -0.000050
Gradient std: 0.001579
Gradient norm: 0.004181
fc2.weight:
Gradient shape: torch.Size([3, 8])
Gradient mean: -0.000000
Gradient std: 0.001164
Gradient norm: 0.005580
fc2.bias:
Gradient shape: torch.Size([3])
Gradient mean: -0.000000
Gradient std: 0.000545
Gradient norm: 0.000771