Thiết kế website giá rẻ

Question

I’m learning how to use the KNN algorithm in python to forecast stock market prices. The problem I have is that the test_and_predict method which is used to test the model on historical data gives great results, but the train_and_predict method which is used to forecast into the future gives strange results. I have no idea what can cause this behavior.

Below is the part of the application responsible for data preparation, testing and forecasting

<code>def prepare_data(df, weeks_ahead, features, history):

matching_features = find_matching_columns(df, features)

X = []

y = []

dates = []

closing_price_col = get_close_matches('Kurs zamknięcia', df.columns, n=1, cutoff=0.6)[0]

date_col = get_close_matches('Data', df.columns, n=1, cutoff=0.6)[0]

df[date_col] = pd.to_datetime(df[date_col], errors='coerce')

for i in range(len(df) - max(history, weeks_ahead)):

if i + history + weeks_ahead >= len(df):

X.append(df[matching_features].iloc[i:i+history].values)

else:

X.append(df[matching_features].iloc[i:i+history].values)

y.append(df[closing_price_col].iloc[i+history+weeks_ahead-1])

dates.append(df[date_col].iloc[i+history+weeks_ahead-1])

return np.array(X), np.array(y), np.array(dates)

def test_and_predict(X, y, dates, weeks, k, classifier, random_state, test_size=0.2):

if len(X.shape) == 3:

n_samples, n_time_steps, n_features = X.shape

X = X.reshape(n_samples, n_time_steps*n_features)

x_testing = X[:len(X)-weeks]

y_testing = y[:len(y)]

scaler = StandardScaler()

X = scaler.fit_transform(X)

if k > len(X):

return None, None, None

mape_scores = []

all_y_true = []

all_y_pred = []

all_dates = []

lenX = len(x_testing)

lenY = len(y_testing)

for i in range(1,weeks):

X_train, X_test = x_testing[:lenX-i], x_testing[lenX-i:lenX-i+1]

y_train, y_test = y_testing[:lenY-i], y_testing[lenY-i:lenY-i+1]

dates_test = dates[lenX-i:lenX-i+1]

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

all_y_true.extend(y_test)

all_y_pred.extend(y_pred)

all_dates.extend(dates_test)

mape = mean_absolute_percentage_error(y_test, y_pred)

mape_scores.append(mape)

accuracy_scores = [1 - mape for mape in mape_scores]

results = {

"weeks": weeks,

"cross_val_scores": accuracy_scores,

"accuracy_scores": accuracy_scores,

"mean_accuracy": np.mean(accuracy_scores),

"std_accuracy": np.std(accuracy_scores),

"random_state": random_state,

"y_true": all_y_true,

"y_pred": all_y_pred,

"dates": all_dates

}

plt.figure(figsize=(10, 6))

plt.plot(all_dates, all_y_true, label='True Values', marker='o')

plt.plot(all_dates, all_y_pred, label='Predicted Values', marker='x')

plt.xlabel('Date')

plt.ylabel('Values')

plt.title('True vs. Predicted Values Over Time')

plt.legend()

plt.show()

return classifier, scaler, results

def train_and_predict(X, y, dates, weeks, k, classifier, random_state, test_size=0.2):

if len(X.shape) == 3:

n_samples, n_time_steps, n_features = X.shape

X = X.reshape(n_samples, n_time_steps*n_features)

scaler = StandardScaler()

X = scaler.fit_transform(X)

if k > len(X):

return None, None, None

mape_scores = []

all_y_true = []

all_y_pred = []

all_dates = []

for i in range(1,weeks):

X_train, X_test = X[:len(X)-weeks], X[len(X)-i:len(X)-i+1]

print(X_test)

y_train = y[:len(y)]

dates_test = dates[len(y)-i:len(y)-i+1]

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

all_y_pred.extend(y_pred)

plt.figure(figsize=(10, 6))

plt.plot( all_y_pred, label='Predicted Values', marker='x')

plt.xlabel('Date')

plt.ylabel('Values')

plt.title('Predicted Values Over Time')

plt.legend()

plt.show()

</code>

<code>def prepare_data(df, weeks_ahead, features, history): matching_features = find_matching_columns(df, features) X = [] y = [] dates = [] closing_price_col = get_close_matches('Kurs zamknięcia', df.columns, n=1, cutoff=0.6)[0] date_col = get_close_matches('Data', df.columns, n=1, cutoff=0.6)[0] df[date_col] = pd.to_datetime(df[date_col], errors='coerce') for i in range(len(df) - max(history, weeks_ahead)): if i + history + weeks_ahead >= len(df): X.append(df[matching_features].iloc[i:i+history].values) else: X.append(df[matching_features].iloc[i:i+history].values) y.append(df[closing_price_col].iloc[i+history+weeks_ahead-1]) dates.append(df[date_col].iloc[i+history+weeks_ahead-1]) return np.array(X), np.array(y), np.array(dates) def test_and_predict(X, y, dates, weeks, k, classifier, random_state, test_size=0.2): if len(X.shape) == 3: n_samples, n_time_steps, n_features = X.shape X = X.reshape(n_samples, n_time_steps*n_features) x_testing = X[:len(X)-weeks] y_testing = y[:len(y)] scaler = StandardScaler() X = scaler.fit_transform(X) if k > len(X): return None, None, None mape_scores = [] all_y_true = [] all_y_pred = [] all_dates = [] lenX = len(x_testing) lenY = len(y_testing) for i in range(1,weeks): X_train, X_test = x_testing[:lenX-i], x_testing[lenX-i:lenX-i+1] y_train, y_test = y_testing[:lenY-i], y_testing[lenY-i:lenY-i+1] dates_test = dates[lenX-i:lenX-i+1] classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) all_y_true.extend(y_test) all_y_pred.extend(y_pred) all_dates.extend(dates_test) mape = mean_absolute_percentage_error(y_test, y_pred) mape_scores.append(mape) accuracy_scores = [1 - mape for mape in mape_scores] results = { "weeks": weeks, "cross_val_scores": accuracy_scores, "accuracy_scores": accuracy_scores, "mean_accuracy": np.mean(accuracy_scores), "std_accuracy": np.std(accuracy_scores), "random_state": random_state, "y_true": all_y_true, "y_pred": all_y_pred, "dates": all_dates } plt.figure(figsize=(10, 6)) plt.plot(all_dates, all_y_true, label='True Values', marker='o') plt.plot(all_dates, all_y_pred, label='Predicted Values', marker='x') plt.xlabel('Date') plt.ylabel('Values') plt.title('True vs. Predicted Values Over Time') plt.legend() plt.show() return classifier, scaler, results def train_and_predict(X, y, dates, weeks, k, classifier, random_state, test_size=0.2): if len(X.shape) == 3: n_samples, n_time_steps, n_features = X.shape X = X.reshape(n_samples, n_time_steps*n_features) scaler = StandardScaler() X = scaler.fit_transform(X) if k > len(X): return None, None, None mape_scores = [] all_y_true = [] all_y_pred = [] all_dates = [] for i in range(1,weeks): X_train, X_test = X[:len(X)-weeks], X[len(X)-i:len(X)-i+1] print(X_test) y_train = y[:len(y)] dates_test = dates[len(y)-i:len(y)-i+1] classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) all_y_pred.extend(y_pred) plt.figure(figsize=(10, 6)) plt.plot( all_y_pred, label='Predicted Values', marker='x') plt.xlabel('Date') plt.ylabel('Values') plt.title('Predicted Values Over Time') plt.legend() plt.show() </code>

def prepare_data(df, weeks_ahead, features, history):
    matching_features = find_matching_columns(df, features)
    
    X = []
    y = []
    dates = []
    
    closing_price_col = get_close_matches('Kurs zamknięcia', df.columns, n=1, cutoff=0.6)[0]
    date_col = get_close_matches('Data', df.columns, n=1, cutoff=0.6)[0]

    df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
    for i in range(len(df) - max(history, weeks_ahead)):
        if i + history + weeks_ahead >= len(df):
            X.append(df[matching_features].iloc[i:i+history].values)
        else:
            X.append(df[matching_features].iloc[i:i+history].values)
            y.append(df[closing_price_col].iloc[i+history+weeks_ahead-1])
            dates.append(df[date_col].iloc[i+history+weeks_ahead-1])
    
    
    return np.array(X), np.array(y), np.array(dates)

def test_and_predict(X, y, dates, weeks, k, classifier, random_state, test_size=0.2):
    
    if len(X.shape) == 3:
        n_samples, n_time_steps, n_features = X.shape
        X = X.reshape(n_samples, n_time_steps*n_features)  
   
    x_testing = X[:len(X)-weeks]
    y_testing = y[:len(y)]
    
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    if k > len(X):
        
        return None, None, None

    mape_scores = []
    all_y_true = []
    all_y_pred = []
    all_dates = []
    
   
    lenX = len(x_testing)
    lenY = len(y_testing)
    for i in range(1,weeks):
        X_train, X_test = x_testing[:lenX-i], x_testing[lenX-i:lenX-i+1] 
        y_train, y_test = y_testing[:lenY-i], y_testing[lenY-i:lenY-i+1]
        dates_test = dates[lenX-i:lenX-i+1]
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)
        all_y_true.extend(y_test)
        all_y_pred.extend(y_pred)
        all_dates.extend(dates_test)
      
  
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mape_scores.append(mape)
        

    accuracy_scores = [1 - mape for mape in mape_scores]  

    results = {
        "weeks": weeks,
        "cross_val_scores": accuracy_scores,
        "accuracy_scores": accuracy_scores,
        "mean_accuracy": np.mean(accuracy_scores),
        "std_accuracy": np.std(accuracy_scores),
        "random_state": random_state,
        "y_true": all_y_true,
        "y_pred": all_y_pred,
        "dates": all_dates
    }

    plt.figure(figsize=(10, 6))
    plt.plot(all_dates, all_y_true, label='True Values', marker='o')
    plt.plot(all_dates, all_y_pred, label='Predicted Values', marker='x')
    plt.xlabel('Date')
    plt.ylabel('Values')
    plt.title('True vs. Predicted Values Over Time')
    plt.legend()
    plt.show()

    return classifier, scaler, results

def train_and_predict(X, y, dates, weeks, k, classifier, random_state, test_size=0.2):
    if len(X.shape) == 3:
        n_samples, n_time_steps, n_features = X.shape
        X = X.reshape(n_samples, n_time_steps*n_features)  
    

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    if k > len(X):
    
        return None, None, None

    mape_scores = []
    all_y_true = []
    all_y_pred = []
    all_dates = []
    
    for i in range(1,weeks):
        X_train, X_test = X[:len(X)-weeks], X[len(X)-i:len(X)-i+1] 
        print(X_test)
        y_train = y[:len(y)]
        dates_test = dates[len(y)-i:len(y)-i+1]
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)
      
        all_y_pred.extend(y_pred)
        
    plt.figure(figsize=(10, 6))

    plt.plot( all_y_pred, label='Predicted Values', marker='x')
    plt.xlabel('Date')
    plt.ylabel('Values')
    plt.title('Predicted Values Over Time')
    plt.legend()
    plt.show()

Charts: The first graph shows the results of testing, the second graph shows the results of forecasting:
enter image description here
enter image description here

Thiết kế website giá rẻ

Danh mục

Python KNNRegressor, why my forecasts look so unnatrual