my goal is to create a ipynb
file to learn about how to create a Linear Regression Model.
The problem:
- How to set the initial w and b, should I set it randomly?
- When to change the w or b on each iteration.
- By how many should I update the w or b on each iteration.
Expected result:
- there is a Model class with a
__init__
andfit
function. - the
fit
function will increment or decrement the w or b on each iteration.
What I’ve tried:
- create the graph
plt.xlim(-2, 11)
plt.ylim(-2, 11)
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = [-1, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9]
plt.plot(x, y, 'ro')
# y = x
x = [0,10]
y = [0,10]
plt.plot(x, y)
# fill_between
x = [0, 10]
y1 = [-1, 9]
y2 = [1, 11]
plt.fill_between(x, y1, y2, alpha=0.2)
del x, y
- create a loss function.
# dots
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = [-1, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9]
# calculated from y = 1x + 0
ŷ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# the loss function of linear regression is (ŷ - y)²
# other name is: the residual
def calculate_linear_regression_loss_function(ŷ: list[int], y: list[int]) -> list[int]:
# map to numpy array (for syntatic sugar)
np_ŷ = np.array(ŷ)
np_y = np.array(y)
# measure distance between ŷ and y
distances = np_ŷ - np_y
# square each distance
squares = distances ** 2
# sum the squares
return squares.sum()
print(calculate_linear_regression_loss_function(ŷ, y))
del x, ŷ, y
- create the generate training data function
def generate_training_data(n: int) -> list[list[int]]:
training_data: list[list[int]] = []
for _ in range(n):
x = randrange(0, 1000) / 100
y = randrange(0, 1000) / 100
training_data.append([x, y])
return training_data
generate_training_data(n=10)
def separate_training_data_into_x_y(training_data: list[list[int]]) -> tuple[list[int], list[int]]:
array = np.array(training_data)
return array[:,0], array[:, 1]
training_data = generate_training_data(n=10)
print("training_data", training_data)
x_training, y_training = separate_training_data_into_x_y(training_data=training_data)
print("x_training", x_training)
training_data = generate_training_data(10)
x_training, y_training = separate_training_data_into_x_y(training_data=training_data)
plt.plot(x_training, y_training, 'ro')