Regression Lineaire, Variables Multiples

Nous cherchons désormais un modèle permettant de prédire y à partir de 2 variables (n = 2) $x_1$ et $x_2$

y = f(x_1,x_2)

X = \begin{bmatrix} x^{(1)}_1 & x^{(1)}_2 & 1 \\ ... & ... & ... \\ x^{(m)}_1 & x^{(m)}_2 & 1 \end{bmatrix}

\theta = \begin{bmatrix} a\\b\\c \end{bmatrix}

Y = \begin{bmatrix} y^{(1)}\\...\\y^{(m)} \end{bmatrix}

Génération du Dataset

import numpy as np
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt

m = 50 # nbre d'échantillons 
n = 2 # nombre de features 

np.random.seed(0)

x, y = make_regression(n_samples=100, n_features=2, noise = 10) # dataset (x, y) linéaire

print('x.shape = ', x.shape)

# Reshape et tri des données
y=y.reshape(y.shape[0],1)
print('y.shape = ', y.shape)
# x = np.sort(x, axis=0)
# y = np.sort(y, axis=0)

plt.scatter(x[:,0], y)

for i in range(0,int(len(x)/6),1):
    print(f'point_{i} :  x_1 = {x[i][0]}, x_2 =   {x[i][1]}, y = {y[i]}')

x.shape =  (100, 2)
y.shape =  (100, 1)
point_0 :  x_1 = 1.0544517269311366, x_2 =   -1.0707526215105425, y = [-75.28417318]
point_1 :  x_1 = -0.3627411659871381, x_2 =   -0.6343220936809636, y = [-73.99436051]
point_2 :  x_1 = -0.8540957393017248, x_2 =   0.31306770165090136, y = [4.18142143]
point_3 :  x_1 = 1.3263858966870303, x_2 =   0.298238174206056, y = [62.37048731]
point_4 :  x_1 = -0.461584604814709, x_2 =   -1.3159074105115212, y = [-146.59108679]
point_5 :  x_1 = 1.9436211856492926, x_2 =   -1.17312340511416, y = [-54.64506412]
........

png

from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.scatter(x[:,0], x[:,1], y[0]) # affiche en 3D la variable x_1, x_2, et la target y

# affiche les noms des axes
ax.set_xlabel('x_1')
ax.set_ylabel('x_2')
ax.set_zlabel('y')

plt.show()

png

Initialisation du Modèle

X = np.hstack((x, np.ones((x.shape[0], 1)))) # Matrice X + biais
print(X.shape)
print(X[:5])

(100, 3)
[[ 1.05445173 -1.07075262  1.        ]
 [-0.36274117 -0.63432209  1.        ]
 [-0.85409574  0.3130677   1.        ]
 [ 1.3263859   0.29823817  1.        ]
 [-0.4615846  -1.31590741  1.        ]]

# Initialisation du vecteur theta aléatoire, avec 3 éléments (car X a trois colonnes)
THETA = np.random.randn(3, 1)
THETA

array([[-0.82646112],
       [ 0.78420863],
       [-0.1954172 ]])

Algorithme de descente du gradient

#-----------------------------------------------------------------
def model( X, theta ):
    return X.dot(theta)
#-----------------------------------------------------------------
# Fonction coût
def J(X, y, theta):
    m=len(y)
    return 1/(2*m) * np.sum((model(X,theta) - y)**2)
#-----------------------------------------------------------------
# Calcul du Gradient
def grad(X,y ,theta):
    m = len(y)
    return (1/m) * X.T.dot( model(X, theta) - y)
#-----------------------------------------------------------------
def gradient_descent(X, y, theta, alpha, n_iterations):
    
    J_history = np.zeros(n_iterations) 
    
    for i in range(0, n_iterations):
        theta = theta - alpha * grad(X, y, theta) 
        J_history[i] = J(X, y, theta)
        
    return theta, J_history
#-----------------------------------------------------------------
#-----------------------------------------------------------------
n_iterations = 1000
alpha = 0.01 # learning rate

THETA_final, J_hist = gradient_descent(X, y, THETA, alpha, n_iterations)

pred = model(X, THETA_final)

print('THETA_final =\n',THETA_final)


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.scatter(x[:,0], x[:,1], y[0])
ax.scatter(x[:,0], x[:,1], pred[0])
plt.show()

THETA_final =
 [[28.67154616]
 [97.29524926]
 [-0.511481  ]]

png


plt.figure()
plt.plot(range(n_iterations), J_hist) # Courbe d'apprentissage  
plt.xlabel("n_iterations")
plt.ylabel("J")
plt.title("Evolution de J")
plt.grid(True)
plt.show()

png