Skip to content

timecopilot.gift_eval

GIFTEval

GIFTEval(
    dataset_name: str,
    term: str,
    output_path: Path | str | None = None,
    storage_path: Path | str | None = None,
)

Evaluation utility for GIFTEval.

This class loads a time series dataset, sets up evaluation metrics, and provides methods to evaluate GluonTS predictors on the dataset, saving results to CSV if desired.

Initialize a GIFTEval instance for a specific dataset and evaluation term.

Parameters:

Name Type Description Default
dataset_name str

Name of the dataset to evaluate on.

required
term str

Evaluation term (e.g., 'medium', 'long').

required
output_path str | Path | None

Directory to save results CSV, or None to skip saving.

None
storage_path Path | str | None

Path where the dataset is stored.

None
Example
import pandas as pd
from timecopilot.gift_eval.eval import GIFTEval
from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor
from timecopilot.models.stats import SeasonalNaive

storage_path = "./gift_eval_data"
GIFTEval.download_data(storage_path)

predictor = GluonTSPredictor(
    # you can use any forecaster from TimeCopilot
    # and create your own forecaster by subclassing 
    # [Forecaster][timecopilot.models.utils.forecaster.Forecaster]
    forecaster=SeasonalNaive(),
    batch_size=512,
)
gift_eval = GIFTEval(
    dataset_name="m4_weekly",
    term="short",
    output_path="./seasonal_naive",
    storage_path=storage_path,
)
gift_eval.evaluate_predictor(
    predictor,
    batch_size=512,
)
eval_df = pd.read_csv("./seasonal_naive/all_results.csv")

Raises:

Type Description
ValueError

If the dataset is not compatible with the specified term.

Source code in timecopilot/gift_eval/eval.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def __init__(
    self,
    dataset_name: str,
    term: str,
    output_path: Path | str | None = None,
    storage_path: Path | str | None = None,
):
    # fmt: off
    """
    Initialize a GIFTEval instance for a specific dataset and evaluation term.

    Args:
        dataset_name (str): Name of the dataset to evaluate on.
        term (str): Evaluation term (e.g., 'medium', 'long').
        output_path (str | Path | None): Directory to save results CSV, or
            None to skip saving.
        storage_path (Path | str | None): Path where the dataset is stored.

    Example:
        ```python
        import pandas as pd
        from timecopilot.gift_eval.eval import GIFTEval
        from timecopilot.gift_eval.gluonts_predictor import GluonTSPredictor
        from timecopilot.models.stats import SeasonalNaive

        storage_path = "./gift_eval_data"
        GIFTEval.download_data(storage_path)

        predictor = GluonTSPredictor(
            # you can use any forecaster from TimeCopilot
            # and create your own forecaster by subclassing 
            # [Forecaster][timecopilot.models.utils.forecaster.Forecaster]
            forecaster=SeasonalNaive(),
            batch_size=512,
        )
        gift_eval = GIFTEval(
            dataset_name="m4_weekly",
            term="short",
            output_path="./seasonal_naive",
            storage_path=storage_path,
        )
        gift_eval.evaluate_predictor(
            predictor,
            batch_size=512,
        )
        eval_df = pd.read_csv("./seasonal_naive/all_results.csv")
        ```

    Raises:
        ValueError: If the dataset is not compatible with the specified term.

    """
    # fmt: on
    res_dataset_properties = requests.get(DATASET_PROPERTIES_URL)
    res_dataset_properties.raise_for_status()  # Raise an error for bad responses
    self.dataset_properties_map = res_dataset_properties.json()
    pretty_names = {
        "saugeenday": "saugeen",
        "temperature_rain_with_missing": "temperature_rain",
        "kdd_cup_2018_with_missing": "kdd_cup_2018",
        "car_parts_with_missing": "car_parts",
    }
    if (
        term == "medium" or term == "long"
    ) and dataset_name not in MED_LONG_DATASETS:
        raise ValueError(f"Dataset {dataset_name} is not a medium or long dataset")
    if "/" in dataset_name:
        ds_key = dataset_name.split("/")[0]
        ds_freq = dataset_name.split("/")[1]
        ds_key = ds_key.lower()
        ds_key = pretty_names.get(ds_key, ds_key)
    else:
        ds_key = dataset_name.lower()
        ds_key = pretty_names.get(ds_key, ds_key)
        ds_freq = self.dataset_properties_map[ds_key]["frequency"]
    self.ds_config = f"{ds_key}/{ds_freq}/{term}"
    self.ds_key = ds_key

    # Initialize the dataset
    to_univariate = (
        Dataset(
            name=dataset_name,
            term=term,
            to_univariate=False,
            storage_path=storage_path,
        ).target_dim
        != 1
    )
    self.dataset = Dataset(
        name=dataset_name,
        term=term,
        to_univariate=to_univariate,
        storage_path=storage_path,
    )
    self.dataset_name = dataset_name
    self.seasonality = get_seasonality(self.dataset.freq)
    self.output_path = output_path

download_data staticmethod

download_data(storage_path: Path | str | None = None)

Download the GIFTEval dataset from Hugging Face.

Parameters:

Name Type Description Default
storage_path Path | str | None

Path to store the dataset.

None
Source code in timecopilot/gift_eval/eval.py
57
58
59
60
61
62
63
64
65
66
67
68
69
@staticmethod
def download_data(storage_path: Path | str | None = None):
    """
    Download the GIFTEval dataset from Hugging Face.

    Args:
        storage_path (Path | str | None): Path to store the dataset.
    """
    snapshot_download(
        repo_id="Salesforce/GiftEval",
        repo_type="dataset",
        local_dir=storage_path,
    )

evaluate_predictor

evaluate_predictor(
    predictor: RepresentablePredictor | GluonTSPredictor,
    batch_size: int | None = None,
    overwrite_results: bool = False,
)

Evaluate a GluonTS predictor on the loaded dataset and save results.

Parameters:

Name Type Description Default
predictor RepresentablePredictor | GluonTSPredictor

The predictor to evaluate.

required
batch_size int | None

Batch size for evaluation. If None, uses predictor's default.

None
overwrite_results bool

Whether to overwrite the existing results CSV file.

False
Source code in timecopilot/gift_eval/eval.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
def evaluate_predictor(
    self,
    predictor: RepresentablePredictor | GluonTSPredictor,
    batch_size: int | None = None,
    overwrite_results: bool = False,
):
    """
    Evaluate a GluonTS predictor on the loaded dataset and save results.

    Args:
        predictor (RepresentablePredictor | GluonTSPredictor): The predictor to
            evaluate.
        batch_size (int | None): Batch size for evaluation. If None, uses
            predictor's default.
        overwrite_results (bool): Whether to overwrite the existing results CSV
            file.
    """
    if batch_size is None:
        if isinstance(predictor, GluonTSPredictor):
            batch_size = predictor.batch_size
        else:
            batch_size = 512
    res = evaluate_model(
        predictor,
        test_data=self.dataset.test_data,
        metrics=METRICS,
        batch_size=batch_size,
        axis=None,
        mask_invalid_label=True,
        allow_nan_forecast=False,
        seasonality=self.seasonality,
    )

    # Prepare the results for the CSV file
    model_name = (
        predictor.__class__.__name__
        if not isinstance(predictor, GluonTSPredictor)
        else predictor.alias
    )
    results_data = [
        [
            self.ds_config,
            model_name,
            res["MSE[mean]"][0],
            res["MSE[0.5]"][0],
            res["MAE[0.5]"][0],
            res["MASE[0.5]"][0],
            res["MAPE[0.5]"][0],
            res["sMAPE[0.5]"][0],
            res["MSIS"][0],
            res["RMSE[mean]"][0],
            res["NRMSE[mean]"][0],
            res["ND[0.5]"][0],
            res["mean_weighted_sum_quantile_loss"][0],
            self.dataset_properties_map[self.ds_key]["domain"],
            self.dataset_properties_map[self.ds_key]["num_variates"],
        ]
    ]

    # Create a DataFrame and write to CSV
    results_df = pd.DataFrame(
        results_data,
        columns=[
            "dataset",
            "model",
            "eval_metrics/MSE[mean]",
            "eval_metrics/MSE[0.5]",
            "eval_metrics/MAE[0.5]",
            "eval_metrics/MASE[0.5]",
            "eval_metrics/MAPE[0.5]",
            "eval_metrics/sMAPE[0.5]",
            "eval_metrics/MSIS",
            "eval_metrics/RMSE[mean]",
            "eval_metrics/NRMSE[mean]",
            "eval_metrics/ND[0.5]",
            "eval_metrics/mean_weighted_sum_quantile_loss",
            "domain",
            "num_variates",
        ],
    )
    if self.output_path is not None:
        csv_file_path = Path(self.output_path) / "all_results.csv"
        csv_file_path.parent.mkdir(parents=True, exist_ok=True)
        if csv_file_path.exists() and not overwrite_results:
            results_df = pd.concat([pd.read_csv(csv_file_path), results_df])
        results_df.to_csv(csv_file_path, index=False)

        logger.info(
            f"Results for {self.dataset_name} have been written to {csv_file_path}"
        )

GluonTSPredictor

GluonTSPredictor(
    forecaster: Forecaster,
    h: int | None = None,
    freq: str | None = None,
    level: list[int | float] | None = None,
    quantiles: list[float] | None = None,
    max_length: int | None = None,
    imputation_method: MissingValueImputation | None = None,
    batch_size: int | None = 1024,
)

Bases: RepresentablePredictor

Adapter to use a TimeCopilot Forecaster as a GluonTS Predictor.

This class wraps a TimeCopilot Forecaster and exposes the GluonTS Predictor interface, allowing it to be used with GluonTS evaluation and processing utilities.

Initialize a GluonTSPredictor.

Parameters:

Name Type Description Default
forecaster Forecaster

The TimeCopilot forecaster to wrap. You can use any forecaster from TimeCopilot, and create your own forecaster by subclassing Forecaster.

required
h int | None

Forecast horizon. If None (default), the horizon is inferred from the dataset.

None
freq str | None

Frequency string (e.g., 'D', 'H'). If None (default), the frequency is inferred from the dataset.

None
level list[int | float] | None

Not supported; use quantiles instead.

None
quantiles list[float] | None

Quantiles to forecast. If None (default), the default quantiles [0.1, 0.2, ..., 0.9] are used.

None
max_length int | None

Maximum length of input series.

None
imputation_method MissingValueImputation | None

Imputation method for missing values. If None (default), the last value is used with LastValueImputation().

None
batch_size int | None

Batch size for prediction.

1024

Raises:

Type Description
NotImplementedError

If level is provided (use quantiles instead).

Source code in timecopilot/gift_eval/gluonts_predictor.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def __init__(
    self,
    forecaster: Forecaster,
    h: int | None = None,
    freq: str | None = None,
    level: list[int | float] | None = None,
    quantiles: list[float] | None = None,
    max_length: int | None = None,
    imputation_method: MissingValueImputation | None = None,
    batch_size: int | None = 1024,
):
    """
    Initialize a GluonTSPredictor.

    Args:
        forecaster (Forecaster): The TimeCopilot forecaster to wrap.
            You can use any forecaster from TimeCopilot, and create your own
            forecaster by subclassing
            [Forecaster][timecopilot.models.utils.forecaster.Forecaster].
        h (int | None): Forecast horizon. If None (default), the horizon is
            inferred from the dataset.
        freq (str | None): Frequency string (e.g., 'D', 'H').
            If None (default), the frequency is inferred from the dataset.
        level (list[int | float] | None): Not supported; use quantiles instead.
        quantiles (list[float] | None): Quantiles to forecast. If None (default),
            the default quantiles [0.1, 0.2, ..., 0.9] are used.
        max_length (int | None): Maximum length of input series.
        imputation_method (MissingValueImputation | None): Imputation method for
            missing values. If None (default), the last value is used
            with LastValueImputation().
        batch_size (int | None): Batch size for prediction.

    Raises:
        NotImplementedError: If level is provided (use quantiles instead).
    """
    self.forecaster = forecaster
    self.h = h
    self.freq = freq
    self.level = level
    if level is not None:
        raise NotImplementedError("level is not supported, use quantiles instead")
    self.quantiles = quantiles or QUANTILE_LEVELS
    self.max_length = max_length
    self.imputation_method = imputation_method or LastValueImputation()
    self.batch_size = batch_size
    self.alias = forecaster.alias

predict

predict(dataset: Dataset, **kwargs: Any) -> list[Forecast]

Predict forecasts for a GluonTS Dataset.

Parameters:

Name Type Description Default
dataset Dataset

GluonTS Dataset to forecast.

required
**kwargs Any

Additional keyword arguments (unused).

{}

Returns:

Type Description
list[Forecast]

list[Forecast]: List of GluonTS Forecast objects for the dataset.

Source code in timecopilot/gift_eval/gluonts_predictor.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
def predict(self, dataset: Dataset, **kwargs: Any) -> list[Forecast]:
    """
    Predict forecasts for a GluonTS Dataset.

    Args:
        dataset (Dataset): GluonTS Dataset to forecast.
        **kwargs: Additional keyword arguments (unused).

    Returns:
        list[Forecast]: List of GluonTS Forecast objects for the dataset.
    """
    fcsts: list[Forecast] = []
    batch: list[Dataset] = []
    h = self.h or dataset.test_data.prediction_length
    if h is None:
        raise ValueError("horizon `h` must be provided")
    freq = self.freq
    for _, entry in tqdm.tqdm(enumerate(dataset), total=len(dataset)):
        if freq is None:
            freq = entry["freq"]
        batch.append(entry)
        if len(batch) == self.batch_size:
            fcsts.extend(self._predict_batch(batch=batch, h=h, freq=freq))
            batch = []
    if len(batch) > 0:
        if freq is None:
            raise ValueError("frequency `freq` must be provided")
        fcsts.extend(self._predict_batch(batch=batch, h=h, freq=freq))
    return fcsts