Skip to content

guidellm.utils.hf_datasets

save_dataset_to_file(dataset, output_path)

Saves a HuggingFace Dataset to file in a supported format.

Parameters:

Name Type Description Default
dataset Dataset

Dataset to save.

required
output_path str | Path

Output file path (.json, .jsonl, .csv, .parquet).

required

Raises:

Type Description
ValueError

If the file extension is not supported.

Source code in src/guidellm/utils/hf_datasets.py
def save_dataset_to_file(dataset: Dataset, output_path: str | Path) -> None:
    """
    Saves a HuggingFace Dataset to file in a supported format.

    :param dataset: Dataset to save.
    :param output_path: Output file path (.json, .jsonl, .csv, .parquet).
    :raises ValueError: If the file extension is not supported.
    """
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    suffix = output_path.suffix.lower()

    if suffix == ".csv":
        dataset.to_csv(output_path)
    elif suffix in {".json", ".jsonl"}:
        dataset.to_json(output_path)
    elif suffix == ".parquet":
        dataset.to_parquet(output_path)
    else:
        raise ValueError(
            f"Unsupported file suffix '{suffix}' in output_path'{output_path}'."
            f" Only {SUPPORTED_TYPES} are supported."
        )