# Following is the implementation of linear regression using SGD: but the line obtained is not the best fit.How to improve this?

352    Asked by ranjan_6399 in Data Science , Asked on Jan 15, 2020

import matplotlib.pyplot as plt

from matplotlib import style

import numpy as np

style.use("fivethirtyeight")

x=[[1],[2],[3],[4],[5],[6],[7],[8],[9],[10]]

y=[[3],[5],[9],[9],[11],[13],[16],[17],[19],[21]]

X=np.array(x)

Y=np.array(y)

learning_rate=0.015

m=1

c=2

gues=[]

for i in range(len(x)):

guess=m*x[i][0]+c

error=guess-y[i][0]

if error

m=m+abs(error)*x[i][0]*learning_rate

c=c+abs(error)*learning_rate

if error>0:

m=m-abs(error)*x[i][0]*learning_rate

c=c-abs(error)*learning_rate

gues.append([guess])

t=np.array(gues)

plt.scatter(X,Y)

plt.plot(X,t)

plt.show()

from sklearn.linear_model import LinearRegression

var=LinearRegression()

var.fit(X,Y)

plt.scatter(X,Y)

plt.plot(X,var.predict(X))

plt.show()

For doing stochastic gradient descent and evaluating the fit at every data point, the final m and c give the parameters of the fitted relationship. The line we are plotting is the 'evolution' of the fitted line.

import numpy as np

import matplotlib.pyplot as plt

X = np.array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

Y = np.array([ 3, 5, 9, 9, 11, 13, 16, 17, 19, 21])

learning_rate = 0.015

m = 1

c = 2

gues = []

for xi, yi in zip(X, Y):

guess = m * xi + c

error = guess - yi

m = m - error * xi * learning_rate

c = c - error * learning_rate

gues.append(guess)

t = np.array(gues)

# Plot the modeled line.

y_hat = m * X + c

plt.figure(figsize=(10,5))

plt.plot(X, y_hat, c='red')

# Plot the data.

plt.scatter(X, Y)

# Plot the evolution of guesses.

plt.plot(X, t)

plt.show()