Nous cherchons désormais un modèle permettant de prédire y à partir de 2 variables (n = 2) $x_1$ et $x_2$
$$ y = f(x_1,x_2) $$$$X = \begin{bmatrix} x^{(1)}_1 & x^{(1)}_2 & 1 \\ ... & ... & ... \\ x^{(m)}_1 & x^{(m)}_2 & 1 \end{bmatrix}$$$$\theta = \begin{bmatrix} a\\b\\c \end{bmatrix}$$$$Y = \begin{bmatrix} y^{(1)}\\...\\y^{(m)} \end{bmatrix}$$import numpy as np
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
m = 50 # nbre d'échantillons
n = 2 # nombre de features
np.random.seed(0)
x, y = make_regression(n_samples=100, n_features=2, noise = 10) # dataset (x, y) linéaire
print('x.shape = ', x.shape)
# Reshape et tri des données
y=y.reshape(y.shape[0],1)
print('y.shape = ', y.shape)
# x = np.sort(x, axis=0)
# y = np.sort(y, axis=0)
plt.scatter(x[:,0], y)
for i in range(0,int(len(x)/6),1):
print(f'point_{i} : x_1 = {x[i][0]}, x_2 = {x[i][1]}, y = {y[i]}')
x.shape = (100, 2)
y.shape = (100, 1)
point_0 : x_1 = 1.0544517269311366, x_2 = -1.0707526215105425, y = [-75.28417318]
point_1 : x_1 = -0.3627411659871381, x_2 = -0.6343220936809636, y = [-73.99436051]
point_2 : x_1 = -0.8540957393017248, x_2 = 0.31306770165090136, y = [4.18142143]
point_3 : x_1 = 1.3263858966870303, x_2 = 0.298238174206056, y = [62.37048731]
point_4 : x_1 = -0.461584604814709, x_2 = -1.3159074105115212, y = [-146.59108679]
point_5 : x_1 = 1.9436211856492926, x_2 = -1.17312340511416, y = [-54.64506412]
........
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x[:,0], x[:,1], y[0]) # affiche en 3D la variable x_1, x_2, et la target y
# affiche les noms des axes
ax.set_xlabel('x_1')
ax.set_ylabel('x_2')
ax.set_zlabel('y')
plt.show()
X = np.hstack((x, np.ones((x.shape[0], 1)))) # Matrice X + biais
print(X.shape)
print(X[:5])
(100, 3)
[[ 1.05445173 -1.07075262 1. ]
[-0.36274117 -0.63432209 1. ]
[-0.85409574 0.3130677 1. ]
[ 1.3263859 0.29823817 1. ]
[-0.4615846 -1.31590741 1. ]]
# Initialisation du vecteur theta aléatoire, avec 3 éléments (car X a trois colonnes)
THETA = np.random.randn(3, 1)
THETA
array([[-0.82646112],
[ 0.78420863],
[-0.1954172 ]])
#-----------------------------------------------------------------
def model( X, theta ):
return X.dot(theta)
#-----------------------------------------------------------------
# Fonction coût
def J(X, y, theta):
m=len(y)
return 1/(2*m) * np.sum((model(X,theta) - y)**2)
#-----------------------------------------------------------------
# Calcul du Gradient
def grad(X,y ,theta):
m = len(y)
return (1/m) * X.T.dot( model(X, theta) - y)
#-----------------------------------------------------------------
def gradient_descent(X, y, theta, alpha, n_iterations):
J_history = np.zeros(n_iterations)
for i in range(0, n_iterations):
theta = theta - alpha * grad(X, y, theta)
J_history[i] = J(X, y, theta)
return theta, J_history
#-----------------------------------------------------------------
#-----------------------------------------------------------------
n_iterations = 1000
alpha = 0.01 # learning rate
THETA_final, J_hist = gradient_descent(X, y, THETA, alpha, n_iterations)
pred = model(X, THETA_final)
print('THETA_final =\n',THETA_final)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x[:,0], x[:,1], y[0])
ax.scatter(x[:,0], x[:,1], pred[0])
plt.show()
THETA_final =
[[28.67154616]
[97.29524926]
[-0.511481 ]]
plt.figure()
plt.plot(range(n_iterations), J_hist) # Courbe d'apprentissage
plt.xlabel("n_iterations")
plt.ylabel("J")
plt.title("Evolution de J")
plt.grid(True)
plt.show()