`guidellm.data.schemas.preprocess`

`PreprocessDatasetConfig`

Bases: StandardBaseModel
Model for preprocess dataset configuration.
Source code in src/guidellm/data/schemas/preprocess.py
class PreprocessDatasetConfig(StandardBaseModel):
    """Model for preprocess dataset configuration."""

    prompt_tokens: int = Field(
        description="The average number of text tokens retained or added to prompts.",
        examples=[100],
        gt=0,
    )
    prompt_tokens_stdev: int | None = Field(
        description="The standard deviation of the number of tokens retained in or "
        "added to prompts.",
        examples=[10],
        gt=0,
        default=None,
    )
    prompt_tokens_min: int | None = Field(
        description="The minimum number of text tokens retained or added to prompts.",
        examples=[100],
        gt=0,
        default=None,
    )
    prompt_tokens_max: int | None = Field(
        description="The maximum number of text tokens retained or added to prompts.",
        examples=[100],
        gt=0,
        default=None,
    )
    output_tokens: int = Field(
        description="The average number of text tokens retained or added to outputs.",
        examples=[100],
        gt=0,
    )
    output_tokens_stdev: int | None = Field(
        description="The standard deviation of the number of tokens retained or "
        "added to outputs.",
        examples=[10],
        gt=0,
        default=None,
    )
    output_tokens_min: int | None = Field(
        description="The minimum number of text tokens retained or added to outputs.",
        examples=[100],
        gt=0,
        default=None,
    )
    output_tokens_max: int | None = Field(
        description="The maximum number of text tokens retained or added to outputs.",
        examples=[100],
        gt=0,
        default=None,
    )
    prefix_tokens_max: int | None = Field(
        description="The maximum number of text tokens left in the prefixes.",
        gt=0,
        examples=[100],
        default=None,
    )