import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity

# Parameters
np.random.seed(42)
n_samples = 100
x_min, x_max = 0, 10
x = np.linspace(x_min, x_max, n_samples)
true_target = np.piecewise(x, [x < 4, (x >= 4) & (x < 7), x >= 7], [lambda x: x/2, 2, lambda x: - x/2 + 5.5])
noise = np.random.normal(0, 0.5, n_samples)
data_x = np.sort(np.random.uniform(x_min, x_max, n_samples))
data_y = np.piecewise(data_x, [data_x < 4, (data_x >= 4) & (data_x < 7), data_x >= 7], 
                      [lambda x: x/2, 2, lambda x: -x/2 + 5.5]) + noise

# Kernel bandwidths for underfitting, ok, and overfitting
bandwidths = [2.0, 0.5, 0.1]
titles = map(lambda s: f"$\sigma = {s}$", bandwidths)#["Underfitting", "Reasonable fit", "Overfitting"]

# Redefine the Nadaraya-Watson estimator function
def nadaraya_watson(x_grid, data_x, data_y, bandwidth):
    weights = np.exp(-0.5 * ((x_grid[:, None] - data_x[None, :]) / bandwidth) ** 2)
    weights /= np.sum(weights, axis=1, keepdims=True)  # Normalize weights
    predictions = weights @ data_y
    return predictions

# Compute predictions for underfitting, ok, and overfitting
x_grid = np.linspace(x_min, x_max, 500)
fig, axs = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
for ax, bw, title in zip(axs, bandwidths, titles):
    predictions = nadaraya_watson(x_grid, data_x, data_y, bw)
    
    # Plot data and predictions
    ax.scatter(data_x, data_y, alpha=0.5, label="Data")
    ax.plot(x, true_target, 'k--', label="True Target")
    ax.plot(x_grid, predictions, label="Nadaraya-Watson", color='red')
    ax.set_title(title)
    ax.legend()
    ax.set_xlabel("$x$")

axs[0].set_ylabel("$y$")
plt.tight_layout()

if True:
    plt.savefig("nadaraya-watson-gaussian.png", dpi=300)

plt.show()