Interactive Linear Regression Visualization¶
This notebook demonstrates linear regression concepts through interactive visualizations. We'll explore:
- How gradient descent finds the optimal parameters
- The effect of different learning rates
- Visualizing the cost function landscape
- Understanding regularization
Let's start by importing the necessary libraries and setting up our environment.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.animation import FuncAnimation
from mpl_toolkits.mplot3d import Axes3D
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings
warnings.filterwarnings('ignore')
# Set random seed for reproducibility
np.random.seed(42)
# Configure matplotlib for better notebook display
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline
1. Generate Synthetic Data¶
First, let's create a function to generate synthetic data with controllable noise levels.
def generate_data(n_samples=100, noise_level=0.2, true_slope=2.5, true_intercept=1.0):
"""
Generate synthetic linear data with noise.
Parameters:
- n_samples: Number of data points
- noise_level: Standard deviation of Gaussian noise
- true_slope: True slope of the linear relationship
- true_intercept: True y-intercept
"""
X = np.random.uniform(-3, 3, n_samples)
noise = np.random.normal(0, noise_level, n_samples)
y = true_slope * X + true_intercept + noise
return X.reshape(-1, 1), y.reshape(-1, 1)
# Generate initial dataset
X, y = generate_data()
print(f"Generated {len(X)} data points")
print(f"X shape: {X.shape}, y shape: {y.shape}")
Generated 100 data points X shape: (100, 1), y shape: (100, 1)
2. Visualize the Data¶
def plot_data_with_fit(X, y, w=None, b=None, title="Linear Regression Data"):
"""
Plot the data points and optional regression line.
"""
fig, ax = plt.subplots(figsize=(10, 6))
# Plot data points
ax.scatter(X, y, alpha=0.6, s=50, c='blue', edgecolors='black', linewidth=0.5, label='Data points')
# Plot regression line if parameters provided
if w is not None and b is not None:
X_line = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
y_line = w * X_line + b
ax.plot(X_line, y_line, 'r-', linewidth=2, label=f'Fit: y = {w[0,0]:.2f}x + {b[0,0]:.2f}')
# Plot residuals
y_pred = w * X + b
for i in range(len(X)):
ax.plot([X[i], X[i]], [y[i], y_pred[i]], 'g--', alpha=0.3, linewidth=0.5)
ax.set_xlabel('X', fontsize=12)
ax.set_ylabel('y', fontsize=12)
ax.set_title(title, fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Visualize the initial data
plot_data_with_fit(X, y, title="Synthetic Linear Data")
3. Implement Linear Regression with Gradient Descent¶
class LinearRegression:
def __init__(self, learning_rate=0.01, n_iterations=1000, regularization=0.0):
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.regularization = regularization
self.costs = []
self.weights_history = []
self.bias_history = []
def cost_function(self, X, y, w, b):
"""Calculate mean squared error with optional L2 regularization."""
m = len(X)
predictions = X @ w + b
cost = (1/(2*m)) * np.sum((predictions - y)**2)
# Add L2 regularization
if self.regularization > 0:
cost += (self.regularization/(2*m)) * np.sum(w**2)
return cost
def fit(self, X, y):
"""Train the model using gradient descent."""
m = len(X)
# Initialize parameters
self.w = np.zeros((X.shape[1], 1))
self.b = np.zeros((1, 1))
for i in range(self.n_iterations):
# Forward propagation
predictions = X @ self.w + self.b
# Calculate gradients
dw = (1/m) * (X.T @ (predictions - y))
db = (1/m) * np.sum(predictions - y)
# Add L2 regularization to weight gradient
if self.regularization > 0:
dw += (self.regularization/m) * self.w
# Update parameters
self.w -= self.learning_rate * dw
self.b -= self.learning_rate * db
# Store history
cost = self.cost_function(X, y, self.w, self.b)
self.costs.append(cost)
self.weights_history.append(self.w.copy())
self.bias_history.append(self.b.copy())
def predict(self, X):
"""Make predictions."""
return X @ self.w + self.b
# Train the model
model = LinearRegression(learning_rate=0.1, n_iterations=100)
model.fit(X, y)
print(f"Final weight: {model.w[0,0]:.4f}")
print(f"Final bias: {model.b[0,0]:.4f}")
print(f"Final cost: {model.costs[-1]:.4f}")
Final weight: 2.4847 Final bias: 0.9970 Final cost: 0.0161
4. Visualize Training Progress¶
def plot_training_progress(model):
"""Plot the cost function over iterations and parameter evolution."""
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# Plot cost over iterations
axes[0].plot(model.costs, 'b-', linewidth=2)
axes[0].set_xlabel('Iteration', fontsize=11)
axes[0].set_ylabel('Cost', fontsize=11)
axes[0].set_title('Cost Function Convergence', fontsize=12, fontweight='bold')
axes[0].grid(True, alpha=0.3)
axes[0].set_yscale('log')
# Plot weight evolution
weights = [w[0,0] for w in model.weights_history]
axes[1].plot(weights, 'g-', linewidth=2)
axes[1].axhline(y=2.5, color='r', linestyle='--', alpha=0.5, label='True value')
axes[1].set_xlabel('Iteration', fontsize=11)
axes[1].set_ylabel('Weight (slope)', fontsize=11)
axes[1].set_title('Weight Convergence', fontsize=12, fontweight='bold')
axes[1].grid(True, alpha=0.3)
axes[1].legend()
# Plot bias evolution
biases = [b[0,0] for b in model.bias_history]
axes[2].plot(biases, 'm-', linewidth=2)
axes[2].axhline(y=1.0, color='r', linestyle='--', alpha=0.5, label='True value')
axes[2].set_xlabel('Iteration', fontsize=11)
axes[2].set_ylabel('Bias (intercept)', fontsize=11)
axes[2].set_title('Bias Convergence', fontsize=12, fontweight='bold')
axes[2].grid(True, alpha=0.3)
axes[2].legend()
plt.tight_layout()
plt.show()
plot_training_progress(model)
5. Visualize Cost Function Landscape in 3D¶
def plot_cost_surface(X, y, model):
"""Create a 3D visualization of the cost function surface."""
# Create grid of weight and bias values
w_range = np.linspace(-1, 5, 50)
b_range = np.linspace(-2, 4, 50)
W, B = np.meshgrid(w_range, b_range)
# Calculate cost for each combination
Z = np.zeros_like(W)
for i in range(W.shape[0]):
for j in range(W.shape[1]):
w_val = W[i, j].reshape(1, 1)
b_val = B[i, j].reshape(1, 1)
Z[i, j] = model.cost_function(X, y, w_val, b_val)
# Create 3D plot
fig = plt.figure(figsize=(12, 5))
# 3D surface plot
ax1 = fig.add_subplot(121, projection='3d')
surf = ax1.plot_surface(W, B, Z, cmap='viridis', alpha=0.8, edgecolor='none')
# Plot the optimization path
weights_path = [w[0,0] for w in model.weights_history[::5]] # Sample every 5th point
biases_path = [b[0,0] for b in model.bias_history[::5]]
costs_path = model.costs[::5]
ax1.plot(weights_path, biases_path, costs_path, 'r.-', linewidth=2, markersize=8, label='Optimization path')
ax1.scatter([weights_path[-1]], [biases_path[-1]], [costs_path[-1]],
color='red', s=100, marker='*', label='Final point')
ax1.set_xlabel('Weight', fontsize=10)
ax1.set_ylabel('Bias', fontsize=10)
ax1.set_zlabel('Cost', fontsize=10)
ax1.set_title('3D Cost Function Surface', fontsize=12, fontweight='bold')
ax1.legend()
# Contour plot
ax2 = fig.add_subplot(122)
contour = ax2.contour(W, B, Z, levels=20, cmap='viridis')
ax2.clabel(contour, inline=True, fontsize=8)
# Plot optimization path on contour
ax2.plot(weights_path, biases_path, 'r.-', linewidth=2, markersize=6, label='Optimization path')
ax2.scatter([weights_path[-1]], [biases_path[-1]], color='red', s=100, marker='*', label='Final point')
ax2.scatter([2.5], [1.0], color='green', s=100, marker='o', label='True values')
ax2.set_xlabel('Weight', fontsize=10)
ax2.set_ylabel('Bias', fontsize=10)
ax2.set_title('Cost Function Contour Plot', fontsize=12, fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
plot_cost_surface(X, y, model)
6. Interactive Learning Rate Comparison¶
def compare_learning_rates(X, y):
"""Compare different learning rates and their convergence behavior."""
learning_rates = [0.001, 0.01, 0.1, 0.5]
colors = ['blue', 'green', 'orange', 'red']
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()
for idx, (lr, color) in enumerate(zip(learning_rates, colors)):
# Train model with specific learning rate
model = LinearRegression(learning_rate=lr, n_iterations=100)
model.fit(X, y)
# Plot cost convergence
axes[idx].plot(model.costs, color=color, linewidth=2)
axes[idx].set_xlabel('Iteration', fontsize=10)
axes[idx].set_ylabel('Cost', fontsize=10)
axes[idx].set_title(f'Learning Rate: {lr}', fontsize=11, fontweight='bold')
axes[idx].grid(True, alpha=0.3)
axes[idx].set_yscale('log')
# Add convergence info
final_cost = model.costs[-1]
converged = final_cost < 0.1
status = "Converged" if converged else "Not converged"
axes[idx].text(0.95, 0.95, f'Final cost: {final_cost:.4f}\nStatus: {status}',
transform=axes[idx].transAxes,
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
verticalalignment='top', horizontalalignment='right')
plt.suptitle('Learning Rate Comparison', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
compare_learning_rates(X, y)
7. Interactive Widget for Real-time Exploration¶
def interactive_regression():
"""Create an interactive widget for exploring linear regression parameters."""
def update_plot(n_samples, noise_level, learning_rate, n_iterations, regularization):
clear_output(wait=True)
# Generate new data
X_new, y_new = generate_data(n_samples=n_samples, noise_level=noise_level)
# Train model
model = LinearRegression(learning_rate=learning_rate,
n_iterations=n_iterations,
regularization=regularization)
model.fit(X_new, y_new)
# Create subplots
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Plot data and fit
axes[0].scatter(X_new, y_new, alpha=0.6, s=30, c='blue')
X_line = np.linspace(X_new.min(), X_new.max(), 100).reshape(-1, 1)
y_line = model.predict(X_line)
axes[0].plot(X_line, y_line, 'r-', linewidth=2)
axes[0].set_xlabel('X')
axes[0].set_ylabel('y')
axes[0].set_title(f'Linear Regression (w={model.w[0,0]:.3f}, b={model.b[0,0]:.3f})')
axes[0].grid(True, alpha=0.3)
# Plot cost convergence
axes[1].plot(model.costs, 'g-', linewidth=2)
axes[1].set_xlabel('Iteration')
axes[1].set_ylabel('Cost')
axes[1].set_title('Cost Function Convergence')
axes[1].grid(True, alpha=0.3)
axes[1].set_yscale('log')
plt.tight_layout()
plt.show()
# Print statistics
print(f"\nModel Statistics:")
print(f"Final Weight: {model.w[0,0]:.4f} (True: 2.5)")
print(f"Final Bias: {model.b[0,0]:.4f} (True: 1.0)")
print(f"Final Cost: {model.costs[-1]:.6f}")
print(f"R² Score: {1 - model.costs[-1]/np.var(y_new):.4f}")
# Create interactive widgets
interact = widgets.interactive(update_plot,
n_samples=widgets.IntSlider(value=100, min=20, max=500, step=20, description='Samples:'),
noise_level=widgets.FloatSlider(value=0.2, min=0, max=2, step=0.1, description='Noise:'),
learning_rate=widgets.FloatLogSlider(value=0.1, base=10, min=-3, max=0, step=0.1, description='Learn Rate:'),
n_iterations=widgets.IntSlider(value=100, min=10, max=500, step=10, description='Iterations:'),
regularization=widgets.FloatSlider(value=0, min=0, max=1, step=0.01, description='L2 Reg:')
)
display(interact)
# Run the interactive widget
interactive_regression()
interactive(children=(IntSlider(value=100, description='Samples:', max=500, min=20, step=20), FloatSlider(valu…
8. Regularization Effects Visualization¶
def visualize_regularization_effects():
"""Compare models with different regularization strengths."""
# Generate data with outliers
X_reg = np.random.uniform(-3, 3, 80)
y_reg = 2.5 * X_reg + 1.0 + np.random.normal(0, 0.3, 80)
# Add some outliers
X_outliers = np.array([-2.5, -1.5, 1.5, 2.5])
y_outliers = np.array([8, -3, -4, 9])
X_combined = np.concatenate([X_reg, X_outliers]).reshape(-1, 1)
y_combined = np.concatenate([y_reg, y_outliers]).reshape(-1, 1)
# Train models with different regularization
reg_values = [0, 0.01, 0.1, 1.0]
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()
for idx, reg in enumerate(reg_values):
model = LinearRegression(learning_rate=0.1, n_iterations=200, regularization=reg)
model.fit(X_combined, y_combined)
# Plot data and fit
axes[idx].scatter(X_reg, y_reg, alpha=0.6, s=30, c='blue', label='Normal data')
axes[idx].scatter(X_outliers, y_outliers, alpha=0.8, s=50, c='red', marker='^', label='Outliers')
X_line = np.linspace(-3, 3, 100).reshape(-1, 1)
y_line = model.predict(X_line)
axes[idx].plot(X_line, y_line, 'g-', linewidth=2,
label=f'Fit: w={model.w[0,0]:.2f}, b={model.b[0,0]:.2f}')
axes[idx].set_xlabel('X')
axes[idx].set_ylabel('y')
axes[idx].set_title(f'Regularization λ = {reg}')
axes[idx].legend()
axes[idx].grid(True, alpha=0.3)
axes[idx].set_ylim(-6, 10)
plt.suptitle('Effect of L2 Regularization on Linear Regression', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
visualize_regularization_effects()
Summary and Key Takeaways¶
This notebook demonstrated several key concepts in linear regression:
Gradient Descent: We visualized how the algorithm iteratively finds the optimal parameters by following the negative gradient of the cost function.
Learning Rate Impact:
- Too small: Slow convergence
- Too large: May overshoot or diverge
- Just right: Fast and stable convergence
Cost Function Landscape: The 3D visualization showed the bowl-shaped nature of the MSE cost function and how gradient descent navigates this surface.
Regularization: L2 regularization helps prevent overfitting by penalizing large weights, making the model more robust to outliers.
Interactive Exploration: The widgets allow you to experiment with different hyperparameters and see their effects in real-time.
Next Steps¶
- Try implementing other optimization algorithms (SGD, Adam)
- Extend to polynomial regression
- Explore different cost functions (Huber loss, MAE)
- Implement cross-validation for hyperparameter tuning