import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.animation import FuncAnimation
from mpl_toolkits.mplot3d import Axes3D
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

# Configure matplotlib for better notebook display
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

def generate_data(n_samples=100, noise_level=0.2, true_slope=2.5, true_intercept=1.0):
    """
    Generate synthetic linear data with noise.
    
    Parameters:
    - n_samples: Number of data points
    - noise_level: Standard deviation of Gaussian noise
    - true_slope: True slope of the linear relationship
    - true_intercept: True y-intercept
    """
    X = np.random.uniform(-3, 3, n_samples)
    noise = np.random.normal(0, noise_level, n_samples)
    y = true_slope * X + true_intercept + noise
    return X.reshape(-1, 1), y.reshape(-1, 1)

# Generate initial dataset
X, y = generate_data()
print(f"Generated {len(X)} data points")
print(f"X shape: {X.shape}, y shape: {y.shape}")

Generated 100 data points
X shape: (100, 1), y shape: (100, 1)

def plot_data_with_fit(X, y, w=None, b=None, title="Linear Regression Data"):
    """
    Plot the data points and optional regression line.
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot data points
    ax.scatter(X, y, alpha=0.6, s=50, c='blue', edgecolors='black', linewidth=0.5, label='Data points')
    
    # Plot regression line if parameters provided
    if w is not None and b is not None:
        X_line = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
        y_line = w * X_line + b
        ax.plot(X_line, y_line, 'r-', linewidth=2, label=f'Fit: y = {w[0,0]:.2f}x + {b[0,0]:.2f}')
        
        # Plot residuals
        y_pred = w * X + b
        for i in range(len(X)):
            ax.plot([X[i], X[i]], [y[i], y_pred[i]], 'g--', alpha=0.3, linewidth=0.5)
    
    ax.set_xlabel('X', fontsize=12)
    ax.set_ylabel('y', fontsize=12)
    ax.set_title(title, fontsize=14, fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

# Visualize the initial data
plot_data_with_fit(X, y, title="Synthetic Linear Data")

class LinearRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000, regularization=0.0):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.regularization = regularization
        self.costs = []
        self.weights_history = []
        self.bias_history = []
    
    def cost_function(self, X, y, w, b):
        """Calculate mean squared error with optional L2 regularization."""
        m = len(X)
        predictions = X @ w + b
        cost = (1/(2*m)) * np.sum((predictions - y)**2)
        
        # Add L2 regularization
        if self.regularization > 0:
            cost += (self.regularization/(2*m)) * np.sum(w**2)
        
        return cost
    
    def fit(self, X, y):
        """Train the model using gradient descent."""
        m = len(X)
        
        # Initialize parameters
        self.w = np.zeros((X.shape[1], 1))
        self.b = np.zeros((1, 1))
        
        for i in range(self.n_iterations):
            # Forward propagation
            predictions = X @ self.w + self.b
            
            # Calculate gradients
            dw = (1/m) * (X.T @ (predictions - y))
            db = (1/m) * np.sum(predictions - y)
            
            # Add L2 regularization to weight gradient
            if self.regularization > 0:
                dw += (self.regularization/m) * self.w
            
            # Update parameters
            self.w -= self.learning_rate * dw
            self.b -= self.learning_rate * db
            
            # Store history
            cost = self.cost_function(X, y, self.w, self.b)
            self.costs.append(cost)
            self.weights_history.append(self.w.copy())
            self.bias_history.append(self.b.copy())
    
    def predict(self, X):
        """Make predictions."""
        return X @ self.w + self.b

# Train the model
model = LinearRegression(learning_rate=0.1, n_iterations=100)
model.fit(X, y)

print(f"Final weight: {model.w[0,0]:.4f}")
print(f"Final bias: {model.b[0,0]:.4f}")
print(f"Final cost: {model.costs[-1]:.4f}")

Final weight: 2.4847
Final bias: 0.9970
Final cost: 0.0161

def plot_training_progress(model):
    """Plot the cost function over iterations and parameter evolution."""
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    
    # Plot cost over iterations
    axes[0].plot(model.costs, 'b-', linewidth=2)
    axes[0].set_xlabel('Iteration', fontsize=11)
    axes[0].set_ylabel('Cost', fontsize=11)
    axes[0].set_title('Cost Function Convergence', fontsize=12, fontweight='bold')
    axes[0].grid(True, alpha=0.3)
    axes[0].set_yscale('log')
    
    # Plot weight evolution
    weights = [w[0,0] for w in model.weights_history]
    axes[1].plot(weights, 'g-', linewidth=2)
    axes[1].axhline(y=2.5, color='r', linestyle='--', alpha=0.5, label='True value')
    axes[1].set_xlabel('Iteration', fontsize=11)
    axes[1].set_ylabel('Weight (slope)', fontsize=11)
    axes[1].set_title('Weight Convergence', fontsize=12, fontweight='bold')
    axes[1].grid(True, alpha=0.3)
    axes[1].legend()
    
    # Plot bias evolution
    biases = [b[0,0] for b in model.bias_history]
    axes[2].plot(biases, 'm-', linewidth=2)
    axes[2].axhline(y=1.0, color='r', linestyle='--', alpha=0.5, label='True value')
    axes[2].set_xlabel('Iteration', fontsize=11)
    axes[2].set_ylabel('Bias (intercept)', fontsize=11)
    axes[2].set_title('Bias Convergence', fontsize=12, fontweight='bold')
    axes[2].grid(True, alpha=0.3)
    axes[2].legend()
    
    plt.tight_layout()
    plt.show()

plot_training_progress(model)

def plot_cost_surface(X, y, model):
    """Create a 3D visualization of the cost function surface."""
    # Create grid of weight and bias values
    w_range = np.linspace(-1, 5, 50)
    b_range = np.linspace(-2, 4, 50)
    W, B = np.meshgrid(w_range, b_range)
    
    # Calculate cost for each combination
    Z = np.zeros_like(W)
    for i in range(W.shape[0]):
        for j in range(W.shape[1]):
            w_val = W[i, j].reshape(1, 1)
            b_val = B[i, j].reshape(1, 1)
            Z[i, j] = model.cost_function(X, y, w_val, b_val)
    
    # Create 3D plot
    fig = plt.figure(figsize=(12, 5))
    
    # 3D surface plot
    ax1 = fig.add_subplot(121, projection='3d')
    surf = ax1.plot_surface(W, B, Z, cmap='viridis', alpha=0.8, edgecolor='none')
    
    # Plot the optimization path
    weights_path = [w[0,0] for w in model.weights_history[::5]]  # Sample every 5th point
    biases_path = [b[0,0] for b in model.bias_history[::5]]
    costs_path = model.costs[::5]
    
    ax1.plot(weights_path, biases_path, costs_path, 'r.-', linewidth=2, markersize=8, label='Optimization path')
    ax1.scatter([weights_path[-1]], [biases_path[-1]], [costs_path[-1]], 
                color='red', s=100, marker='*', label='Final point')
    
    ax1.set_xlabel('Weight', fontsize=10)
    ax1.set_ylabel('Bias', fontsize=10)
    ax1.set_zlabel('Cost', fontsize=10)
    ax1.set_title('3D Cost Function Surface', fontsize=12, fontweight='bold')
    ax1.legend()
    
    # Contour plot
    ax2 = fig.add_subplot(122)
    contour = ax2.contour(W, B, Z, levels=20, cmap='viridis')
    ax2.clabel(contour, inline=True, fontsize=8)
    
    # Plot optimization path on contour
    ax2.plot(weights_path, biases_path, 'r.-', linewidth=2, markersize=6, label='Optimization path')
    ax2.scatter([weights_path[-1]], [biases_path[-1]], color='red', s=100, marker='*', label='Final point')
    ax2.scatter([2.5], [1.0], color='green', s=100, marker='o', label='True values')
    
    ax2.set_xlabel('Weight', fontsize=10)
    ax2.set_ylabel('Bias', fontsize=10)
    ax2.set_title('Cost Function Contour Plot', fontsize=12, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

plot_cost_surface(X, y, model)

def compare_learning_rates(X, y):
    """Compare different learning rates and their convergence behavior."""
    learning_rates = [0.001, 0.01, 0.1, 0.5]
    colors = ['blue', 'green', 'orange', 'red']
    
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()
    
    for idx, (lr, color) in enumerate(zip(learning_rates, colors)):
        # Train model with specific learning rate
        model = LinearRegression(learning_rate=lr, n_iterations=100)
        model.fit(X, y)
        
        # Plot cost convergence
        axes[idx].plot(model.costs, color=color, linewidth=2)
        axes[idx].set_xlabel('Iteration', fontsize=10)
        axes[idx].set_ylabel('Cost', fontsize=10)
        axes[idx].set_title(f'Learning Rate: {lr}', fontsize=11, fontweight='bold')
        axes[idx].grid(True, alpha=0.3)
        axes[idx].set_yscale('log')
        
        # Add convergence info
        final_cost = model.costs[-1]
        converged = final_cost < 0.1
        status = "Converged" if converged else "Not converged"
        axes[idx].text(0.95, 0.95, f'Final cost: {final_cost:.4f}\nStatus: {status}', 
                      transform=axes[idx].transAxes, 
                      bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
                      verticalalignment='top', horizontalalignment='right')
    
    plt.suptitle('Learning Rate Comparison', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()

compare_learning_rates(X, y)

def interactive_regression():
    """Create an interactive widget for exploring linear regression parameters."""
    
    def update_plot(n_samples, noise_level, learning_rate, n_iterations, regularization):
        clear_output(wait=True)
        
        # Generate new data
        X_new, y_new = generate_data(n_samples=n_samples, noise_level=noise_level)
        
        # Train model
        model = LinearRegression(learning_rate=learning_rate, 
                                n_iterations=n_iterations,
                                regularization=regularization)
        model.fit(X_new, y_new)
        
        # Create subplots
        fig, axes = plt.subplots(1, 2, figsize=(14, 5))
        
        # Plot data and fit
        axes[0].scatter(X_new, y_new, alpha=0.6, s=30, c='blue')
        X_line = np.linspace(X_new.min(), X_new.max(), 100).reshape(-1, 1)
        y_line = model.predict(X_line)
        axes[0].plot(X_line, y_line, 'r-', linewidth=2)
        axes[0].set_xlabel('X')
        axes[0].set_ylabel('y')
        axes[0].set_title(f'Linear Regression (w={model.w[0,0]:.3f}, b={model.b[0,0]:.3f})')
        axes[0].grid(True, alpha=0.3)
        
        # Plot cost convergence
        axes[1].plot(model.costs, 'g-', linewidth=2)
        axes[1].set_xlabel('Iteration')
        axes[1].set_ylabel('Cost')
        axes[1].set_title('Cost Function Convergence')
        axes[1].grid(True, alpha=0.3)
        axes[1].set_yscale('log')
        
        plt.tight_layout()
        plt.show()
        
        # Print statistics
        print(f"\nModel Statistics:")
        print(f"Final Weight: {model.w[0,0]:.4f} (True: 2.5)")
        print(f"Final Bias: {model.b[0,0]:.4f} (True: 1.0)")
        print(f"Final Cost: {model.costs[-1]:.6f}")
        print(f"R² Score: {1 - model.costs[-1]/np.var(y_new):.4f}")
    
    # Create interactive widgets
    interact = widgets.interactive(update_plot,
        n_samples=widgets.IntSlider(value=100, min=20, max=500, step=20, description='Samples:'),
        noise_level=widgets.FloatSlider(value=0.2, min=0, max=2, step=0.1, description='Noise:'),
        learning_rate=widgets.FloatLogSlider(value=0.1, base=10, min=-3, max=0, step=0.1, description='Learn Rate:'),
        n_iterations=widgets.IntSlider(value=100, min=10, max=500, step=10, description='Iterations:'),
        regularization=widgets.FloatSlider(value=0, min=0, max=1, step=0.01, description='L2 Reg:')
    )
    
    display(interact)

# Run the interactive widget
interactive_regression()

interactive(children=(IntSlider(value=100, description='Samples:', max=500, min=20, step=20), FloatSlider(valu…

def visualize_regularization_effects():
    """Compare models with different regularization strengths."""
    
    # Generate data with outliers
    X_reg = np.random.uniform(-3, 3, 80)
    y_reg = 2.5 * X_reg + 1.0 + np.random.normal(0, 0.3, 80)
    
    # Add some outliers
    X_outliers = np.array([-2.5, -1.5, 1.5, 2.5])
    y_outliers = np.array([8, -3, -4, 9])
    
    X_combined = np.concatenate([X_reg, X_outliers]).reshape(-1, 1)
    y_combined = np.concatenate([y_reg, y_outliers]).reshape(-1, 1)
    
    # Train models with different regularization
    reg_values = [0, 0.01, 0.1, 1.0]
    
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()
    
    for idx, reg in enumerate(reg_values):
        model = LinearRegression(learning_rate=0.1, n_iterations=200, regularization=reg)
        model.fit(X_combined, y_combined)
        
        # Plot data and fit
        axes[idx].scatter(X_reg, y_reg, alpha=0.6, s=30, c='blue', label='Normal data')
        axes[idx].scatter(X_outliers, y_outliers, alpha=0.8, s=50, c='red', marker='^', label='Outliers')
        
        X_line = np.linspace(-3, 3, 100).reshape(-1, 1)
        y_line = model.predict(X_line)
        axes[idx].plot(X_line, y_line, 'g-', linewidth=2, 
                      label=f'Fit: w={model.w[0,0]:.2f}, b={model.b[0,0]:.2f}')
        
        axes[idx].set_xlabel('X')
        axes[idx].set_ylabel('y')
        axes[idx].set_title(f'Regularization λ = {reg}')
        axes[idx].legend()
        axes[idx].grid(True, alpha=0.3)
        axes[idx].set_ylim(-6, 10)
    
    plt.suptitle('Effect of L2 Regularization on Linear Regression', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()

visualize_regularization_effects()

Interactive Linear Regression Visualization¶

1. Generate Synthetic Data¶

2. Visualize the Data¶

3. Implement Linear Regression with Gradient Descent¶

4. Visualize Training Progress¶

5. Visualize Cost Function Landscape in 3D¶

6. Interactive Learning Rate Comparison¶

7. Interactive Widget for Real-time Exploration¶

8. Regularization Effects Visualization¶

Summary and Key Takeaways¶

Next Steps¶