# import numpy as np
# import matplotlib.pyplot as plt

# # Define parameters and data
# y_train = 1.0  # Example training target
# x_train = 2.0  # Example training feature
# y_val = 1.5    # Example validation target
# x_val = 2.5    # Example validation feature

# # Hyperparameters for the algorithm
# tau = 0.1  # Step size for x updates
# rho = 0.1  # Step size for theta updates
# num_iterations = 100  # Number of iterations to run the algorithm

# # Initial values
# x_k = np.random.rand()  # Initial guess for x (could be w in notation)
# theta_k = np.random.rand()  # Initial guess for theta
# v_k = np.zeros_like(x_k)  # Initial vector for v

# # Lists to track the evolution of (x, theta) over iterations for phase portrait
# x_history = [x_k]
# theta_history = [theta_k]

# # Define the function g and its gradients
# def g(x, theta):
#     return (y_train - x * x_train)**2 + theta * x**2

# def grad_g_x(x, theta):
#     return -2 * x_train * (y_train - x * x_train) + 2 * theta * x

# def hessian_g_xx(x, theta):
#     return 2 * x_train**2 + 2 * theta

# def grad_hessian_theta_x(x, theta):
#     # Assuming no cross-terms for simplicity
#     return 2 * x  # Modify if theta affects g in more complex form

# # Define the function f and its gradients
# def f(x, theta):
#     return (y_val - x * x_val)**2

# def grad_f_x(x, theta):
#     return -2 * x_val * (y_val - x * x_val)

# def grad_f_theta(x, theta):
#     # Assuming theta affects f directly, this might need adjustment if more complex
#     return 0  # Placeholder if no direct dependency, else specify dependency on theta

# # Iterative optimization
# for k in range(num_iterations):
#     # Step 1: Update x
#     x_k = x_k - tau * grad_g_x(x_k, theta_k)
    
#     # Step 2: Update v
#     hessian_xx = hessian_g_xx(x_k, theta_k)
#     v_k = v_k - tau * (hessian_xx * v_k + grad_f_x(x_k, theta_k))
    
#     # Step 3: Update theta
#     cross_term = grad_hessian_theta_x(x_k, theta_k)
#     theta_k = theta_k - rho * (cross_term * v_k + grad_f_theta(x_k, theta_k))
    
#     # Store the values in history for plotting
#     x_history.append(x_k)
#     theta_history.append(theta_k)

# # Plotting the phase portrait of (x, theta) evolution
# plt.figure(figsize=(8, 6))
# plt.plot(x_history, theta_history, marker='o', linestyle='-', markersize=3)
# plt.xlabel('x (parameter)')
# plt.ylabel('lambda (regularization parameter)')
# plt.title('Phase Portrait of (x, lambda) Evolution')
# plt.grid()
# plt.show()

# 2D case

# import numpy as np
# import matplotlib.pyplot as plt

# # Define parameters and data
# y_train = 1.0  # Example training target
# x_train = np.array([2.0, 1.5])  # Example 2D training feature
# y_val = 1.5  # Example validation target
# x_val = np.array([2.5, 2.0])  # Example 2D validation feature

# # Hyperparameters for the algorithm
# tau = 0.01  # Step size for x updates
# rho = 0.01  # Step size for theta updates
# num_iterations = 100  # Number of iterations to run the algorithm

# # Initial values (in 2D)
# x_k = np.random.rand(2)  # Initial guess for x (2D vector)
# theta_k = np.random.rand()  # Initial guess for theta (scalar regularization parameter)
# v_k = np.zeros_like(x_k)  # Initial vector for v (same dimension as x_k)

# # Lists to track the evolution of (x, theta) over iterations for phase portrait
# x_history = [x_k.copy()]
# theta_history = [theta_k]

# # Define the function g and its gradients
# def g(x, theta):
#     return (y_train - np.dot(x, x_train))**2 + theta * np.dot(x, x)

# def grad_g_x(x, theta):
#     return -2 * x_train * (y_train - np.dot(x, x_train)) + 2 * theta * x

# def hessian_g_xx(x, theta):
#     return 2 * np.outer(x_train, x_train) + 2 * theta * np.eye(2)

# def grad_hessian_theta_x(x, theta):
#     # This is now a vector since we are differentiating a scalar with respect to a vector
#     return 2 * x

# # Define the function f and its gradients
# def f(x, theta):
#     return (y_val - np.dot(x, x_val))**2

# def grad_f_x(x, theta):
#     return -2 * x_val * (y_val - np.dot(x, x_val))

# def grad_f_theta(x, theta):
#     # Assuming theta affects f directly, this might need adjustment if more complex
#     return 0  # Placeholder if no direct dependency, else specify dependency on theta

# # Iterative optimization
# for k in range(num_iterations):
#     # Step 1: Update x (w in 2D)
#     x_k = x_k - tau * grad_g_x(x_k, theta_k)
    
#     # Step 2: Update v
#     hessian_xx = hessian_g_xx(x_k, theta_k)
#     v_k = v_k - tau * (hessian_xx @ v_k + grad_f_x(x_k, theta_k))
    
#     # Step 3: Update theta
#     cross_term = grad_hessian_theta_x(x_k, theta_k)
#     theta_k = theta_k - rho * (np.dot(cross_term, v_k) + grad_f_theta(x_k, theta_k))
    
#     # Store the values in history for plotting
#     x_history.append(x_k.copy())
#     theta_history.append(theta_k)

# # Convert history to arrays for easier plotting
# x_history = np.array(x_history)
# theta_history = np.array(theta_history)

# # Plotting the 3D phase portrait of (x[0], x[1], theta) evolution
# fig = plt.figure(figsize=(10, 8))
# ax = fig.add_subplot(111, projection='3d')
# ax.plot(x_history[:, 0], x_history[:, 1], theta_history, marker='o', linestyle='-', markersize=3)
# ax.set_xlabel('x[0] (1st dimension of x)')
# ax.set_ylabel('x[1] (2nd dimension of x)')
# ax.set_zlabel('theta (regularization parameter)')
# ax.set_title('3D Phase Portrait of (x[0], x[1], theta) Evolution')
# plt.show()

import numpy as np
import matplotlib.pyplot as plt

# Given data in 2D
y_train = 1.0  # Training target
x_train = np.array([-2.0, 1.5])  # Training feature in 2D
y_val = -1.5  # Validation target
x_val = np.array([0.5, -1.0])  # Validation feature in 2D

# Define the value function h(lambda)
def h(lambda_reg):
    # Calculate the optimal w(lambda) in 2D
    w_lambda = (y_train * np.dot(x_train, x_train)) / (np.dot(x_train, x_train) + lambda_reg)
    # Compute the objective function f(w(lambda), lambda) for h(lambda)
    return (y_val - w_lambda * np.dot(x_train, x_val))**2

# Range of lambda values for plotting
lambda_values = np.linspace(0.01, 10, 200)  # Avoid lambda=0 to prevent division by zero
h_values = [h(lambda_reg) for lambda_reg in lambda_values]

# Plotting
plt.figure(figsize=(8, 6))
plt.plot(lambda_values, h_values, label='$h(\lambda)$')
plt.xlabel('$\lambda$ (regularization parameter)')
plt.ylabel('$h(\lambda)$')
plt.title('Value Function $h(\lambda)$ as a Function of Regularization Parameter $\\lambda$ (2D)')
plt.grid()
plt.legend()
plt.show()