Thiết kế website giá rẻ

Question

I’m attempting to get a PyTorch Forecasting Temporal Fusion Transformer working, following the example here:

https://pytorch-forecasting.readthedocs.io/en/stable/tutorials/stallion.html

I keep getting “KeyError: 0”, even though my data has no 0’s in it at all. I first got this error with my actual data set, so I switched to a minimal dataset (below) and simplified code, but the error still occurs. It occurs when I use a GroupNormalizer when defining the TimeSeriesDataSet.

I am running the below code in a PaperSpace Jupyter notebook cell with a Python 3.9.13 container:

!pip install numpy==1.24.4
!pip install pandas
!pip install --ignore-installed PyYAML==5.4.1
!pip install torch
!pip install pytorch-lightning
!pip install pytorch-forecasting
!pip install statsmodels
!pip install scipy
!pip install matplotlib

import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import NaNLabelEncoder, GroupNormalizer

# Create a small sample dataframe
sample_data = pd.DataFrame({
    'symbol': ['AAPL', 'AAPL', 'AAPL', 'GOOG', 'GOOG', 'GOOG'],
    'period': [1, 2, 3, 1, 2, 3],
    'close': [150.0, 152.0, 151.0, 2800.0, 2820.0, 2810.0],
    'day_of_epoch': [19844, 19844, 19844, 19845, 19845, 19845]
})

# Ensure 'period' column is treated as integer index
sample_data['period'] = sample_data['period'].astype(int)

# Print the dataframe to verify
print(sample_data)

# Define the maximum prediction length
max_prediction_length = 1
max_encoder_length = 2

# Define the training dataset with minimal configuration
training = TimeSeriesDataSet(
    sample_data,
    time_idx="period",
    target="close",
    group_ids=["symbol", "day_of_epoch"],
    min_encoder_length=1,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["symbol"],
    time_varying_known_categoricals=[],
    time_varying_known_reals=["period", "day_of_epoch"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["close"],
    target_normalizer=GroupNormalizer(
        groups=["symbol", "day_of_epoch"], transformation="softplus"
    ),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    categorical_encoders={
        'symbol': NaNLabelEncoder(add_nan=True)
    },
)

It produces the following output (after the installations finish):

  symbol  period   close  day_of_epoch
0   AAPL       1   150.0         19844
1   AAPL       2   152.0         19844
2   AAPL       3   151.0         19844
3   GOOG       1  2800.0         19845
4   GOOG       2  2820.0         19845
5   GOOG       3  2810.0         19845

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /usr/local/lib/python3.9/dist-packages/pytorch_forecasting/data/encoders.py:331, in NaNLabelEncoder.transform(self, y, return_norm, target_scale, ignore_na)
    330 try:
--> 331     encoded = [self.classes_[v] for v in y]
    332 except KeyError as e:

File /usr/local/lib/python3.9/dist-packages/pytorch_forecasting/data/encoders.py:331, in <listcomp>(.0)
    330 try:
--> 331     encoded = [self.classes_[v] for v in y]
    332 except KeyError as e:

KeyError: 0

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
Input In [1], in <cell line: 34>()
     31 max_encoder_length = 2
     33 # Define the training dataset with minimal configuration
---> 34 training = TimeSeriesDataSet(
     35     sample_data,
     36     time_idx="period",
     37     target="close",
     38     group_ids=["symbol", "day_of_epoch"],
     39     min_encoder_length=1,
     40     max_encoder_length=max_encoder_length,
     41     min_prediction_length=1,
     42     max_prediction_length=max_prediction_length,
     43     static_categoricals=["symbol"],
     44     time_varying_known_categoricals=[],
     45     time_varying_known_reals=["period", "day_of_epoch"],
     46     time_varying_unknown_categoricals=[],
     47     time_varying_unknown_reals=["close"],
     48     target_normalizer=GroupNormalizer(
     49         groups=["symbol", "day_of_epoch"], transformation="softplus"
     50     ),
     51     add_relative_time_idx=True,
     52     add_target_scales=True,
     53     add_encoder_length=True,
     54     categorical_encoders={
     55         'symbol': NaNLabelEncoder(add_nan=True)
     56     },
     57 )

File /usr/local/lib/python3.9/dist-packages/pytorch_forecasting/data/timeseries.py:476, in TimeSeriesDataSet.__init__(self, data, time_idx, target, group_ids, weight, max_encoder_length, min_encoder_length, min_prediction_idx, min_prediction_length, max_prediction_length, static_categoricals, static_reals, time_varying_known_categoricals, time_varying_known_reals, time_varying_unknown_categoricals, time_varying_unknown_reals, variable_groups, constant_fill_strategy, allow_missing_timesteps, lags, add_relative_time_idx, add_target_scales, add_encoder_length, target_normalizer, categorical_encoders, scalers, randomize_length, predict_mode)
    473 data = data.sort_values(self.group_ids + [self.time_idx])
    475 # preprocess data
--> 476 data = self._preprocess_data(data)
    477 for target in self.target_names:
    478     assert target not in self.scalers, "Target normalizer is separate and not in scalers."

File /usr/local/lib/python3.9/dist-packages/pytorch_forecasting/data/timeseries.py:837, in TimeSeriesDataSet._preprocess_data(self, data)
    831     transformer = self.get_transformer(name)
    832     if (
    833         name not in self.target_names
    834         and transformer is not None
    835         and not isinstance(transformer, EncoderNormalizer)
    836     ):
--> 837         data[name] = self.transform_values(name, data[name], data=data, inverse=False)
    839 # encode lagged categorical targets
    840 for name in self.lagged_targets:
    841     # normalizer only now available

File /usr/local/lib/python3.9/dist-packages/pytorch_forecasting/data/timeseries.py:935, in TimeSeriesDataSet.transform_values(self, name, values, data, inverse, group_id, **kwargs)
    933 # remaining categories
    934 if name in self.flat_categoricals + self.group_ids + self._group_ids:
--> 935     return transform(values, **kwargs)
    937 # reals
    938 elif name in self.reals:

File /usr/local/lib/python3.9/dist-packages/sklearn/utils/_set_output.py:313, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    311 @wraps(f)
    312 def wrapped(self, X, *args, **kwargs):
--> 313     data_to_wrap = f(self, X, *args, **kwargs)
    314     if isinstance(data_to_wrap, tuple):
    315         # only wrap the first output for cross decomposition
    316         return_tuple = (
    317             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    318             *data_to_wrap[1:],
    319         )

File /usr/local/lib/python3.9/dist-packages/pytorch_forecasting/data/encoders.py:333, in NaNLabelEncoder.transform(self, y, return_norm, target_scale, ignore_na)
    331             encoded = [self.classes_[v] for v in y]
    332         except KeyError as e:
--> 333             raise KeyError(
    334                 f"Unknown category '{e.args[0]}' encountered. Set `add_nan=True` to allow unknown categories"
    335             )
    337 if isinstance(y, torch.Tensor):
    338     encoded = torch.tensor(encoded, dtype=torch.long, device=y.device)

KeyError: "Unknown category '0' encountered. Set `add_nan=True` to allow unknown categories"

Any insights would be greatly appreciated.

Thiết kế website giá rẻ

Danh mục

KeyError: 0 when creating TimeSeriesDataSet with GroupNormalizer, using PyTorch Forecasting