Skip to content

guidellm.schemas.response

Backend response models for request and response handling.

Provides standardized response models for generation operations that capture output text, usage metrics, and compilation of request statistics. Ensures consistent data handling and statistics aggregation across different backend implementations.

GenerationResponse

Bases: StandardBaseModel

Response model for backend generation operations.

Captures the output and metrics from a generation request, providing structured data for text output, tool call payloads, token usage statistics, and compilation of detailed request statistics for analysis and monitoring purposes.

Example: :: response = GenerationResponse( request_id="req-123", text="Generated response text", input_metrics=UsageMetrics(token_count=50), output_metrics=UsageMetrics(token_count=25) ) stats = response.compile_stats(request, info)

Source code in src/guidellm/schemas/response.py
class GenerationResponse(StandardBaseModel):
    """
    Response model for backend generation operations.

    Captures the output and metrics from a generation request, providing structured
    data for text output, tool call payloads, token usage statistics, and compilation
    of detailed request statistics for analysis and monitoring purposes.

    Example:
    ::
        response = GenerationResponse(
            request_id="req-123",
            text="Generated response text",
            input_metrics=UsageMetrics(token_count=50),
            output_metrics=UsageMetrics(token_count=25)
        )
        stats = response.compile_stats(request, info)
    """

    request_id: str = Field(
        description="Unique identifier matching the original GenerationRequest."
    )
    response_id: str | None = Field(
        default=None,
        description="Unique identifier matching the original vLLM Response ID.",
    )
    request_args: str | None = Field(
        description="Arguments passed to the backend for request processing."
    )
    text: str | None = Field(
        default=None,
        description="The generated response text.",
    )
    reasoning_text: str | None = Field(
        default=None,
        description=(
            "Reasoning/chain-of-thought text emitted before the content. "
            "Not included in multi-turn history unless explicitly enabled."
        ),
    )
    tool_calls: list[ToolCall] | None = Field(
        default=None,
        description=(
            "Raw tool call payloads from the model response, each containing "
            "id, type, and function (name + arguments) in OpenAI format."
        ),
    )
    input_metrics: UsageMetrics = Field(
        default_factory=UsageMetrics,
        description="Token usage statistics from the input prompt.",
    )
    output_metrics: UsageMetrics = Field(
        default_factory=UsageMetrics,
        description="Token usage statistics from the generated output.",
    )

    def compile_stats(
        self,
        request: GenerationRequest,
        info: RequestInfo,
        prefer_response: bool = True,
    ) -> GenerativeRequestStats:
        """
        Compile and return comprehensive request statistics.

        Merges metrics from the request and response objects to create a complete
        statistical record, with preference given to response-level metrics when
        available to ensure accuracy of actual execution data.

        :param request: The original generation request containing input data
        :param info: Metadata and timing information for the request execution
        :param prefer_response: Whether to prefer response metrics over request
            metrics when both are available
        :return: A GenerativeRequestStats object containing detailed statistics
        :raises ValueError: When request IDs don't match between objects
        """
        if request.request_id != self.request_id:
            raise ValueError("Mismatched request IDs between request and response.")

        if info.request_id != self.request_id:
            raise ValueError("Mismatched request IDs between info and response.")

        if info.status != "completed":
            # Clear out request output metrics if the request failed since
            # those are not valid.  Preserve tool_call_count=0 when the
            # request expected a tool call so errored-request statistics
            # still contribute to the tool call metrics table.
            expected_tool_call = request.expects_tool_call
            request.output_metrics = UsageMetrics()
            if expected_tool_call:
                request.output_metrics.tool_call_count = 0

        base_input = request.input_metrics if prefer_response else self.input_metrics
        override_input = (
            self.input_metrics if prefer_response else request.input_metrics
        )
        base_output = request.output_metrics if prefer_response else self.output_metrics
        override_output = (
            self.output_metrics if prefer_response else request.output_metrics
        )

        input_metrics_dict = base_input.model_dump()
        for key, value in override_input.model_dump().items():
            if value is not None:
                input_metrics_dict[key] = value
        output_metrics_dict = base_output.model_dump()
        for key, value in override_output.model_dump().items():
            if value is not None:
                output_metrics_dict[key] = value

        # If tool calls were expected but none were recorded (either because
        # the request errored or the model produced none with ignore_continue),
        # set tool_call_count=0 so the metrics table still appears.
        if (
            request.expects_tool_call
            and output_metrics_dict.get("tool_call_count") is None
        ):
            output_metrics_dict["tool_call_count"] = 0

        return GenerativeRequestStats(
            request_id=self.request_id,
            response_id=self.response_id,
            request_args=self.request_args,
            output=self.text,
            reasoning_output=self.reasoning_text,
            tool_calls=self.tool_calls,
            info=info,
            input_metrics=UsageMetrics(**input_metrics_dict),
            output_metrics=UsageMetrics(**output_metrics_dict),
        )

compile_stats(request, info, prefer_response=True)

Compile and return comprehensive request statistics.

Merges metrics from the request and response objects to create a complete statistical record, with preference given to response-level metrics when available to ensure accuracy of actual execution data.

Parameters:

Name Type Description Default
request GenerationRequest

The original generation request containing input data

required
info RequestInfo

Metadata and timing information for the request execution

required
prefer_response bool

Whether to prefer response metrics over request metrics when both are available

True

Returns:

Type Description
GenerativeRequestStats

A GenerativeRequestStats object containing detailed statistics

Raises:

Type Description
ValueError

When request IDs don't match between objects

Source code in src/guidellm/schemas/response.py
def compile_stats(
    self,
    request: GenerationRequest,
    info: RequestInfo,
    prefer_response: bool = True,
) -> GenerativeRequestStats:
    """
    Compile and return comprehensive request statistics.

    Merges metrics from the request and response objects to create a complete
    statistical record, with preference given to response-level metrics when
    available to ensure accuracy of actual execution data.

    :param request: The original generation request containing input data
    :param info: Metadata and timing information for the request execution
    :param prefer_response: Whether to prefer response metrics over request
        metrics when both are available
    :return: A GenerativeRequestStats object containing detailed statistics
    :raises ValueError: When request IDs don't match between objects
    """
    if request.request_id != self.request_id:
        raise ValueError("Mismatched request IDs between request and response.")

    if info.request_id != self.request_id:
        raise ValueError("Mismatched request IDs between info and response.")

    if info.status != "completed":
        # Clear out request output metrics if the request failed since
        # those are not valid.  Preserve tool_call_count=0 when the
        # request expected a tool call so errored-request statistics
        # still contribute to the tool call metrics table.
        expected_tool_call = request.expects_tool_call
        request.output_metrics = UsageMetrics()
        if expected_tool_call:
            request.output_metrics.tool_call_count = 0

    base_input = request.input_metrics if prefer_response else self.input_metrics
    override_input = (
        self.input_metrics if prefer_response else request.input_metrics
    )
    base_output = request.output_metrics if prefer_response else self.output_metrics
    override_output = (
        self.output_metrics if prefer_response else request.output_metrics
    )

    input_metrics_dict = base_input.model_dump()
    for key, value in override_input.model_dump().items():
        if value is not None:
            input_metrics_dict[key] = value
    output_metrics_dict = base_output.model_dump()
    for key, value in override_output.model_dump().items():
        if value is not None:
            output_metrics_dict[key] = value

    # If tool calls were expected but none were recorded (either because
    # the request errored or the model produced none with ignore_continue),
    # set tool_call_count=0 so the metrics table still appears.
    if (
        request.expects_tool_call
        and output_metrics_dict.get("tool_call_count") is None
    ):
        output_metrics_dict["tool_call_count"] = 0

    return GenerativeRequestStats(
        request_id=self.request_id,
        response_id=self.response_id,
        request_args=self.request_args,
        output=self.text,
        reasoning_output=self.reasoning_text,
        tool_calls=self.tool_calls,
        info=info,
        input_metrics=UsageMetrics(**input_metrics_dict),
        output_metrics=UsageMetrics(**output_metrics_dict),
    )

ToolCall

Bases: BaseModel

A single tool call from an OpenAI-compatible API response.

Source code in src/guidellm/schemas/tool_call.py
class ToolCall(BaseModel):
    """A single tool call from an OpenAI-compatible API response."""

    id: str = ""
    type: str = "function"
    function: ToolCallFunction = Field(default_factory=ToolCallFunction)

ToolCallFunction

Bases: BaseModel

Function name and arguments for a single tool call.

Source code in src/guidellm/schemas/tool_call.py
class ToolCallFunction(BaseModel):
    """Function name and arguments for a single tool call."""

    name: str = ""
    arguments: str = ""