Here we are trying to train a linear regression model on Boston data sheet.Although the data sheet is unavailable in the latest versions of Scikit library, we can use it by the code in line 7 and line 8.
It is saying that the index is out of bounds
I thought it would be due to the permutation got out of range in permute_data function created b us so in the next line I had put X[perm-1] instead of X[perm] same was done with y; Yet the problem persists, The code is as follows:
import numpy as np
from numpy import ndarray
from typing import Tuple
from typing import Dict
import matplotlib.pyplot as plt
import pandas
from sklearn.datasets import fetch_openml
boston = fetch_openml(name='boston', version=1)
data = boston['data']
target = boston['target']
features = boston['feature_names']
from sklearn.preprocessing import StandardScaler
s = StandardScaler()
data = s.fit_transform(data)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(data,target,test_size=0.30,random_state= 80718)
y_train = np.reshape(-1,1)
y_test = np.reshape(-1,1)
def init_weights(rows):
W = np.random.randn(rows,1)
B = np.random.randn(1,1) # Try with B as only integer
weights = {}
weights['W'] = W
weights['B'] = B
return weights
def linear_forward(X_batch:ndarray,y_batch:ndarray,weights: Dict[str,ndarray]) -> Tuple[float,dict[str,ndarray]]:
d = np.dot(X_batch,weights['W'])
P :ndarray = d + weights['B']
loss = np.mean(np.power(y_batch - P,2))
forward_info :Dict[str,ndarray]= {}
forward_info['X'] = X_batch
forward_info['N'] = d
forward_info['P'] = P
forward_info['y'] = y_batch
return forward_info, loss
def loss_gradients(forward_info:Dict[str,ndarray],weights:Dict[str,ndarray]):
dLdP = -1*2*(forward_info['y'] - forward_info['P'])
dPdW = np.transpose(forward_info['X'])
dLdW = np.dot(dPdW,dLdP)
dPdB = np.ones_like(weights['B'])
dLdB = (dLdP * dPdB).sum(axis = 0)
loss_gradients :Dict[str,ndarray]= {}
loss_gradients['W'] = dLdW # Change of loss with respect to our functions
loss_gradients['B'] = dLdB
return loss_gradients
def generate_batch(X:ndarray,y:ndarray,start,batch_size): # Yekda nusta return last X[last] karun baghu
assert X.ndim == y.ndim == 2
if start + batch_size > X.shape[0]:
batch_size = X.shape[0] - start
X_batch,y_batch = X[start: start+batch_size],y[start:start+batch_size]
return X_batch,y_batch
def permute_data(X:ndarray,y:ndarray):
perm = np.random.permutation(X.shape[0])
return X[perm],y[perm]
def train(X:ndarray,y:ndarray,n_iter,batch_size,learning_rate,return_weights:bool,seed):
if seed: # ......................nahi kalaala
np.random.seed(seed)
start = 0 # Nahi kalaala
weights = init_weights(X.shape[1])
X,y = permute_data(X,y)
losses = []
for i in range(n_iter):
if start >= X.shape[0]:
X,y = permute_data(X,y)
start = 0
X_batch,y_batch = generate_batch(X,y,start,batch_size)
start+=batch_size # start = start + batch_size
forward_info,loss = linear_forward(X_batch,y_batch,weights)
losses.append(loss)
loss_grads = loss_gradients(forward_info,weights)
for key in weights.keys():
weights[key] -= learning_rate*loss_grads[key]
if return_weights:
return losses,weights
train_info = train(X_train,y_train,1000,23,0.001,True,80718) #Seeds Badlun Baghu
losses = train_info[0]
weights_trained = train_info[1]
plt.plot(list(range(1000)), losses)
def predict(X: ndarray,weights:Dict[str,ndarray]): #weights to be put here are the ones obtained from training
N = np.dot(X,weights['W'])
P = N + weights['B']
return P
preds = predict(X_test,weights_trained)
def mae(preds: ndarray, actuals: ndarray):
return np.mean(np.abs(preds - actuals))
def rmsE(preds:ndarray,actuals:ndarray):
return np .sqrt(np.mean(np.power(actuals - preds,2)))
print("Mean absolute error:", round(mae(preds, y_test), 4),
"Root mean squared error:", round(rmsE(preds, y_test), 4))
Soham Ghodake is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.