Logistische regressie met gradient descent
In deze lab-sessie passen we logistische regressie met gradient descent toe op de Iris dataset. We bouwen een binaire classifier die onderscheid maakt tussen twee soorten irissen op basis van hun bloemeigenschappen.
Leerdoelenยถ
Logistische regressie implementeren met gradient descent
Binary cross-entropy loss berekenen en minimaliseren
Extrasยถ
Model performance evalueren
Decision boundaries visualiseren
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
rng = np.random.default_rng(42)1. Data Loading en Exploratieยถ
De Iris dataset bevat metingen van 150 iris bloemen van 3 verschillende soorten:
Setosa
Versicolor
Virginica
Voor elk bloem zijn er 4 features gemeten (in cm):
Sepal length (kelkblad lengte)
Sepal width (kelkblad breedte)
Petal length (bloemblad lengte)
Petal width (bloemblad breedte)
โ ๏ธ Voor deze oefening maken we een binaire classificatie: we onderscheiden alleen tussen Versicolor (klasse 0) en Virginica (klasse 1).
# Load the Iris dataset
iris = load_iris()
# Create a DataFrame for easier handling
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df["species"] = iris.target
df["species_name"] = df["species"].map({0: "setosa", 1: "versicolor", 2: "virginica"})
# Filter for binary classification: Versicolor (1) vs Virginica (2)
df_binary = df[df["species"].isin([1, 2])].copy()
# Remap to 0 and 1
df_binary["target"] = (df_binary["species"] == 2).astype(int)
print(f"Total samples: {len(df_binary)}")
print(f"Versicolor (0): {(df_binary['target'] == 0).sum()}")
print(f"Virginica (1): {(df_binary['target'] == 1).sum()}")
print(f"\nFeatures: {iris.feature_names}")
print("\nFirst few rows:")
df_binary.head(10)Total samples: 100
Versicolor (0): 50
Virginica (1): 50
Features: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
First few rows:
# Visualize the data - focusing on two features for visualization
# Create a scatter plot for petal dimensions
fig1 = px.scatter(
df_binary,
x="petal length (cm)",
y="petal width (cm)",
color="species_name",
color_discrete_map={"versicolor": "#e377c2", "virginica": "#17becf"},
title="Iris Dataset: Petal Dimensions",
labels={"petal length (cm)": "Petal Length (cm)", "petal width (cm)": "Petal Width (cm)"},
width=700,
height=500,
)
fig1.update_traces(marker={"size": 10, "line": {"width": 1, "color": "white"}})
fig1.show()
# Create a scatter plot for sepal dimensions
fig2 = px.scatter(
df_binary,
x="sepal length (cm)",
y="sepal width (cm)",
color="species_name",
color_discrete_map={"versicolor": "#e377c2", "virginica": "#17becf"},
title="Iris Dataset: Sepal Dimensions",
labels={"sepal length (cm)": "Sepal Length (cm)", "sepal width (cm)": "Sepal Width (cm)"},
width=700,
height=500,
)
fig2.update_traces(marker={"size": 10, "line": {"width": 1, "color": "white"}})
fig2.show()2. Data Voorbereidingยถ
We splitsen de data in:
Training set (80%): gebruikt om het model te trainen
Test set (20%): gebruikt om het model te evalueren
We gebruiken feature scaling (standaardisatie) om ervoor te zorgen dat alle features dezelfde schaal hebben. Dit helpt gradient descent sneller convergeren.
# Prepare features and target
X = df_binary[iris.feature_names].values
y = df_binary["target"].values
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Add intercept term (bias)
X_train_final = np.column_stack([np.ones(len(X_train_scaled)), X_train_scaled])
X_test_final = np.column_stack([np.ones(len(X_test_scaled)), X_test_scaled])
print(f"Training set: {X_train_final.shape[0]} samples")
print(f"Test set: {X_test_final.shape[0]} samples")
print(f"Number of features (including intercept): {X_train_final.shape[1]}")
print("\nClass distribution in training set:")
print(f" Versicolor (0): {(y_train == 0).sum()} ({100 * (y_train == 0).mean():.1f}%)")
print(f" Virginica (1): {(y_train == 1).sum()} ({100 * (y_train == 1).mean():.1f}%)")Training set: 80 samples
Test set: 20 samples
Number of features (including intercept): 5
Class distribution in training set:
Versicolor (0): 40 (50.0%)
Virginica (1): 40 (50.0%)
3. Logistische Regressie Implementatieยถ
We implementeren logistische regressie met gradient descent zoals gezien in de cursus:
Modelยถ
Binary Cross-Entropy Lossยถ
Gradiรซntยถ
Gradient Descent Updateยถ
# Define the sigmoid function
def sigmoid(z):
"""Logistic (sigmoid) function."""
return 1 / (1 + np.exp(-np.clip(z, -500, 500))) # Clip to avoid overflow
# Define the binary cross-entropy loss
def bce_loss(X, y, b):
"""Binary Cross-Entropy Loss."""
y_hat = sigmoid(X @ b)
# Add small epsilon to avoid log(0)
epsilon = 1e-10
loss = -np.mean(y * np.log(y_hat + epsilon) + (1 - y) * np.log(1 - y_hat + epsilon))
return loss
# Define the gradient of BCE loss
def gradient_bce(X, y, b):
"""Gradient of BCE loss with respect to parameters b."""
y_hat = sigmoid(X @ b)
return (1 / len(y)) * X.T @ (y_hat - y)
# Calculate accuracy
def accuracy(X, y, b):
"""Calculate classification accuracy."""
y_hat = sigmoid(X @ b)
y_pred = (y_hat >= 0.5).astype(int)
return np.mean(y_pred == y)
print("Functions defined:")
print(" - sigmoid(z): Logistic activation function")
print(" - bce_loss(X, y, b): Binary cross-entropy loss")
print(" - gradient_bce(X, y, b): Gradient of BCE loss")
print(" - accuracy(X, y, b): Classification accuracy")Functions defined:
- sigmoid(z): Logistic activation function
- bce_loss(X, y, b): Binary cross-entropy loss
- gradient_bce(X, y, b): Gradient of BCE loss
- accuracy(X, y, b): Classification accuracy
4. Training met Gradient Descentยถ
Nu trainen we het model met gradient descent. We houden de loss en accuracy bij voor zowel de training als test set.
# Gradient Descent Training
learning_rate = 0.1
max_iterations = 5000
# Initialize parameters (weights + bias)
n_features = X_train_final.shape[1]
b = np.zeros(n_features)
# History tracking
b_history = [b.copy()]
train_loss_history = [bce_loss(X_train_final, y_train, b)]
test_loss_history = [bce_loss(X_test_final, y_test, b)]
train_acc_history = [accuracy(X_train_final, y_train, b)]
test_acc_history = [accuracy(X_test_final, y_test, b)]
print("Starting gradient descent...")
print(f"Learning rate: {learning_rate}")
print(f"Max iterations: {max_iterations}")
print("\nInitial values:")
print(f" Train Loss: {train_loss_history[0]:.4f}")
print(f" Test Loss: {test_loss_history[0]:.4f}")
print(f" Train Accuracy: {train_acc_history[0]:.2%}")
print(f" Test Accuracy: {test_acc_history[0]:.2%}")
print("\nTraining...")
for iteration in range(max_iterations):
# Compute gradient
grad = gradient_bce(X_train_final, y_train, b)
# Update parameters
b = b - learning_rate * grad
# Store history every 10 iterations to save memory
if iteration % 10 == 0 or iteration == max_iterations - 1:
b_history.append(b.copy())
train_loss_history.append(bce_loss(X_train_final, y_train, b))
test_loss_history.append(bce_loss(X_test_final, y_test, b))
train_acc_history.append(accuracy(X_train_final, y_train, b))
test_acc_history.append(accuracy(X_test_final, y_test, b))
print("\nโ Training completed!")
print("\nFinal results:")
print(f" Train Loss: {train_loss_history[-1]:.4f}")
print(f" Test Loss: {test_loss_history[-1]:.4f}")
print(f" Train Accuracy: {train_acc_history[-1]:.2%}")
print(f" Test Accuracy: {test_acc_history[-1]:.2%}")
print("\nFinal parameters (b):")
print(f" Bias (bโ): {b[0]:.4f}")
for i, feature_name in enumerate(iris.feature_names):
print(f" {feature_name}: {b[i + 1]:.4f}")Starting gradient descent...
Learning rate: 0.1
Max iterations: 5000
Initial values:
Train Loss: 0.6931
Test Loss: 0.6931
Train Accuracy: 50.00%
Test Accuracy: 50.00%
Training...
โ Training completed!
Final results:
Train Loss: 0.0555
Test Loss: 0.1357
Train Accuracy: 96.25%
Test Accuracy: 95.00%
Final parameters (b):
Bias (bโ): 0.4535
sepal length (cm): -0.5804
sepal width (cm): -0.9527
petal length (cm): 3.9708
petal width (cm): 5.3047
โ Training completed!
Final results:
Train Loss: 0.0555
Test Loss: 0.1357
Train Accuracy: 96.25%
Test Accuracy: 95.00%
Final parameters (b):
Bias (bโ): 0.4535
sepal length (cm): -0.5804
sepal width (cm): -0.9527
petal length (cm): 3.9708
petal width (cm): 5.3047
5. Visualisatie van Training Procesยถ
We visualiseren hoe de loss en accuracy evolueren tijdens het training proces.
# Plot training history
iterations_plot = np.arange(0, max_iterations + 1, 10)
if len(iterations_plot) < len(train_loss_history):
iterations_plot = np.append(iterations_plot, max_iterations)
# Create DataFrame for plotting
history_df = pd.DataFrame(
{
"Iteration": np.tile(iterations_plot, 2),
"Loss": train_loss_history + test_loss_history,
"Dataset": ["Training"] * len(train_loss_history) + ["Test"] * len(test_loss_history),
}
)
# Plot loss
fig_loss = px.line(
history_df,
x="Iteration",
y="Loss",
color="Dataset",
title="Binary Cross-Entropy Loss Over Training Iterations",
labels={"Loss": "Binary Cross-Entropy Loss"},
color_discrete_map={"Training": "blue", "Test": "red"},
width=800,
height=500,
)
fig_loss.show()
# Create DataFrame for accuracy
accuracy_df = pd.DataFrame(
{
"Iteration": np.tile(iterations_plot, 2),
"Accuracy": train_acc_history + test_acc_history,
"Dataset": ["Training"] * len(train_acc_history) + ["Test"] * len(test_acc_history),
}
)
# Plot accuracy
fig_acc = px.line(
accuracy_df,
x="Iteration",
y="Accuracy",
color="Dataset",
title="Accuracy Over Training Iterations",
color_discrete_map={"Training": "blue", "Test": "red"},
width=800,
height=500,
)
fig_acc.update_yaxes(tickformat=".0%", range=[0, 1.05])
fig_acc.show()6. Model Evaluatie en Predictionsยถ
Laten we kijken naar de voorspellingen van ons model en hoe goed het presteert.
# Make predictions on test set
y_prob_test = sigmoid(X_test_final @ b)
y_pred_test = (y_prob_test >= 0.5).astype(int)
# Create a results DataFrame
results_df = pd.DataFrame(
{
"True Label": y_test,
"True Species": ["Versicolor" if y == 0 else "Virginica" for y in y_test],
"Predicted Prob": y_prob_test,
"Predicted Label": y_pred_test,
"Predicted Species": ["Versicolor" if y == 0 else "Virginica" for y in y_pred_test],
"Correct": y_test == y_pred_test,
}
)
print("Test Set Predictions:")
print(results_df)
print(f"\nTest Accuracy: {accuracy(X_test_final, y_test, b):.2%}")
print(f"Correct predictions: {(y_test == y_pred_test).sum()} / {len(y_test)}")Test Set Predictions:
True Label True Species Predicted Prob Predicted Label \
0 1 Virginica 0.999416 1
1 1 Virginica 0.801364 1
2 1 Virginica 0.997942 1
3 1 Virginica 0.991538 1
4 0 Versicolor 0.000002 0
5 0 Versicolor 0.000036 0
6 0 Versicolor 0.617276 1
7 1 Virginica 0.994276 1
8 0 Versicolor 0.137936 0
9 0 Versicolor 0.291849 0
10 0 Versicolor 0.000158 0
11 1 Virginica 0.998850 1
12 0 Versicolor 0.000512 0
13 0 Versicolor 0.000304 0
14 0 Versicolor 0.000016 0
15 1 Virginica 0.999984 1
16 1 Virginica 0.687062 1
17 0 Versicolor 0.022189 0
18 1 Virginica 0.699027 1
19 1 Virginica 0.767607 1
Predicted Species Correct
0 Virginica True
1 Virginica True
2 Virginica True
3 Virginica True
4 Versicolor True
5 Versicolor True
6 Virginica False
7 Virginica True
8 Versicolor True
9 Versicolor True
10 Versicolor True
11 Virginica True
12 Versicolor True
13 Versicolor True
14 Versicolor True
15 Virginica True
16 Virginica True
17 Versicolor True
18 Virginica True
19 Virginica True
Test Accuracy: 95.00%
Correct predictions: 19 / 20
# Confusion Matrix visualization
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_test)
# Create confusion matrix heatmap using plotly
cm_df = pd.DataFrame(
cm,
index=["Versicolor (0)", "Virginica (1)"],
columns=["Versicolor (0)", "Virginica (1)"],
)
fig = px.imshow(
cm_df,
text_auto=True,
color_continuous_scale="Blues",
title="Confusion Matrix",
labels={"x": "Predicted Label", "y": "True Label", "color": "Count"},
width=600,
height=500,
)
fig.update_traces(textfont_size=20)
fig.show()
print("\nConfusion Matrix Analysis:")
print(f"True Negatives (Versicolor correctly classified): {cm[0, 0]}")
print(f"False Positives (Versicolor misclassified as Virginica): {cm[0, 1]}")
print(f"False Negatives (Virginica misclassified as Versicolor): {cm[1, 0]}")
print(f"True Positives (Virginica correctly classified): {cm[1, 1]}")
Confusion Matrix Analysis:
True Negatives (Versicolor correctly classified): 9
False Positives (Versicolor misclassified as Virginica): 1
False Negatives (Virginica misclassified as Versicolor): 0
True Positives (Virginica correctly classified): 10
7. Decision Boundary Visualisatieยถ
We visualiseren de decision boundary van ons model voor twee features tegelijk. We gebruiken petal length en petal width omdat deze features het best discrimineren tussen de twee soorten.
# For visualization, we'll train a simple 2D model with just petal length and width
# This allows us to visualize the decision boundary
# Select only petal features (indices 2 and 3)
X_train_2d = X_train_scaled[:, [2, 3]] # Petal length and width
X_test_2d = X_test_scaled[:, [2, 3]]
# Add intercept
X_train_2d_final = np.column_stack([np.ones(len(X_train_2d)), X_train_2d])
X_test_2d_final = np.column_stack([np.ones(len(X_test_2d)), X_test_2d])
# Train a 2D model
b_2d = np.zeros(3)
learning_rate_2d = 0.1
max_iterations_2d = 5000
for _ in range(max_iterations_2d):
grad_2d = gradient_bce(X_train_2d_final, y_train, b_2d)
b_2d = b_2d - learning_rate_2d * grad_2d
acc_2d = accuracy(X_test_2d_final, y_test, b_2d)
print(f"2D Model Test Accuracy: {acc_2d:.2%}")
print(f"2D Model Parameters: bโ={b_2d[0]:.3f}, bโ={b_2d[1]:.3f}, bโ={b_2d[2]:.3f}")2D Model Test Accuracy: 85.00%
2D Model Parameters: bโ=0.536, bโ=4.011, bโ=5.436
# Create a mesh grid for decision boundary visualization
x1_min, x1_max = X_train_2d[:, 0].min() - 0.5, X_train_2d[:, 0].max() + 0.5
x2_min, x2_max = X_train_2d[:, 1].min() - 0.5, X_train_2d[:, 1].max() + 0.5
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, 200), np.linspace(x2_min, x2_max, 200))
# Create feature matrix for grid
X_grid = np.column_stack([np.ones(xx1.ravel().shape[0]), xx1.ravel(), xx2.ravel()])
# Predict probabilities for each point in the grid
Z = sigmoid(X_grid @ b_2d)
Z = Z.reshape(xx1.shape)
# Create contour plot using plotly
fig = px.imshow(
Z,
x=np.linspace(x1_min, x1_max, 200),
y=np.linspace(x2_min, x2_max, 200),
color_continuous_scale="RdYlBu_r",
origin="lower",
title="Decision Boundary: Logistic Regression on Iris Dataset",
labels={
"x": "Petal Length (standardized)",
"y": "Petal Width (standardized)",
"color": "P(Virginica)",
},
width=900,
height=700,
)
fig.update_coloraxes(showscale=False)
# Add training data points
train_df = pd.DataFrame(
{
"Petal Length": X_train_2d[:, 0],
"Petal Width": X_train_2d[:, 1],
"Species": ["Versicolor" if y == 0 else "Virginica" for y in y_train],
"Type": "Training",
}
)
# Add test data points
test_df = pd.DataFrame(
{
"Petal Length": X_test_2d[:, 0],
"Petal Width": X_test_2d[:, 1],
"Species": ["Versicolor" if y == 0 else "Virginica" for y in y_test],
"Type": "Test",
}
)
# Add scatter traces for training data
for species in ["Versicolor", "Virginica"]:
train_species = train_df[train_df["Species"] == species]
color = "#e377c2" if species == "Versicolor" else "#17becf"
fig.add_scatter(
x=train_species["Petal Length"],
y=train_species["Petal Width"],
mode="markers",
marker={
"size": 12,
"color": color,
"line": {"width": 2, "color": "white"},
"symbol": "circle",
},
name=f"{species} (train)",
)
# Add scatter traces for test data
for species in ["Versicolor", "Virginica"]:
test_species = test_df[test_df["Species"] == species]
color = "#e377c2" if species == "Versicolor" else "#17becf"
fig.add_scatter(
x=test_species["Petal Length"],
y=test_species["Petal Width"],
mode="markers",
marker={
"size": 12,
"color": color,
"line": {"width": 2, "color": "black"},
"symbol": "triangle-up",
},
name=f"{species} (test)",
)
fig.show()
print("\nDecision boundary equation (in standardized space):")
print(f" {b_2d[0]:.3f} + {b_2d[1]:.3f}ร(petal_length) + {b_2d[2]:.3f}ร(petal_width) = 0")
Decision boundary equation (in standardized space):
0.536 + 4.011ร(petal_length) + 5.436ร(petal_width) = 0
8. Interactieve Visualisatie met Plotlyยถ
Laten we een interactieve 3D visualisatie maken om te zien hoe de voorspelde waarschijnlijkheid varieert in de feature space.
# Create 3D surface plot with Plotly
# Prepare data for the surface
surface_df = pd.DataFrame(
{
"Petal Length": xx1.ravel(),
"Petal Width": xx2.ravel(),
"P(Virginica)": Z.ravel(),
}
)
# Create 3D scatter plot for training data
train_3d_df = pd.DataFrame(
{
"Petal Length": X_train_2d[:, 0],
"Petal Width": X_train_2d[:, 1],
"True Class": y_train,
"Species": ["Versicolor" if y == 0 else "Virginica" for y in y_train],
"Type": "Training",
}
)
# Create 3D scatter plot for test data
test_3d_df = pd.DataFrame(
{
"Petal Length": X_test_2d[:, 0],
"Petal Width": X_test_2d[:, 1],
"True Class": y_test,
"Species": ["Versicolor" if y == 0 else "Virginica" for y in y_test],
"Type": "Test",
}
)
# Start with 3D scatter for training data
fig = px.scatter_3d(
train_3d_df,
x="Petal Length",
y="Petal Width",
z="True Class",
color="Species",
color_discrete_map={"Versicolor": "#e377c2", "Virginica": "#17becf"},
symbol="Type",
title="3D Visualization: Logistic Regression Probability Surface",
width=900,
height=700,
)
# Add test data
fig.add_scatter3d(
x=test_3d_df["Petal Length"],
y=test_3d_df["Petal Width"],
z=test_3d_df["True Class"],
mode="markers",
marker={
"size": 8,
"color": ["#e377c2" if s == "Versicolor" else "#17becf" for s in test_3d_df["Species"]],
"symbol": "diamond",
"line": {"color": "black", "width": 2},
},
name="Test Data",
)
# For the 3D surface, we need plotly.graph_objects (een uitbreiding van plotly.express)
fig.add_trace(
go.Surface(
x=np.linspace(x1_min, x1_max, 200),
y=np.linspace(x2_min, x2_max, 200),
z=Z,
colorscale="RdYlBu_r",
opacity=0.7,
name="P(Virginica)",
showscale=True,
colorbar={"title": "P(Virginica)", "x": 1.1},
)
)
# Update layout
fig.update_layout(
scene={
"xaxis_title": "Petal Length (std)",
"yaxis_title": "Petal Width (std)",
"zaxis_title": "Probability / True Class",
"camera": {"eye": {"x": 1.5, "y": -1.5, "z": 1.2}},
}
)
fig.show()