ResultRecorder

ResultRecorder ¶

An abstract base class for recording experiment results, including configuration, metrics, and model outputs.

This class defines the interface for different result recording implementations, such as saving to a local directory, uploading to wandb, or integrating with MLflow.

Source code in flexeval/core/result_recorder/base.py

class ResultRecorder(ABC):
    """
    An abstract base class for recording experiment results, including configuration,
    metrics, and model outputs.

    This class defines the interface for different result recording implementations,
    such as saving to a local directory, uploading to wandb, or integrating with MLflow.
    """

    @abstractmethod
    def record_config(self, config: dict[str, Any], group: str | None = None) -> None:
        """
        Record the configuration parameters of the experiment.

        Args:
            config: A dictionary containing the configuration
                parameters of the evaluation.
            group: An optional group name to organize the configuration.
        """

    @abstractmethod
    def record_metrics(self, metrics: dict[str, Any], group: str | None = None) -> None:
        """
        Record the evaluation metrics of the experiment.

        Args:
            metrics: A dictionary containing the evaluation metrics,
                where keys are metric names and values are the corresponding results.
            group: An optional group name to organize the metrics.
        """

    @abstractmethod
    def record_model_outputs(self, model_outputs: list[dict[str, Any]], group: str | None = None) -> None:
        """
        Record the outputs generated by the model during evaluation.

        Args:
            model_outputs: A list of dictionaries, where each
                dictionary represents a single model output. The structure of these
                dictionaries may vary depending on the specific model and task.
            group: An optional group name to organize the model outputs.
        """

record_config `abstractmethod` ¶

record_config(
    config: dict[str, Any], group: str | None = None
) -> None

Record the configuration parameters of the experiment.

Parameters:

config (dict[str, Any]) –

A dictionary containing the configuration parameters of the evaluation.
group (str | None, default: None ) –

An optional group name to organize the configuration.

Source code in flexeval/core/result_recorder/base.py

@abstractmethod
def record_config(self, config: dict[str, Any], group: str | None = None) -> None:
    """
    Record the configuration parameters of the experiment.

    Args:
        config: A dictionary containing the configuration
            parameters of the evaluation.
        group: An optional group name to organize the configuration.
    """

record_metrics `abstractmethod` ¶

record_metrics(
    metrics: dict[str, Any], group: str | None = None
) -> None

Record the evaluation metrics of the experiment.

Parameters:

metrics (dict[str, Any]) –

A dictionary containing the evaluation metrics, where keys are metric names and values are the corresponding results.
group (str | None, default: None ) –

An optional group name to organize the metrics.

Source code in flexeval/core/result_recorder/base.py

@abstractmethod
def record_metrics(self, metrics: dict[str, Any], group: str | None = None) -> None:
    """
    Record the evaluation metrics of the experiment.

    Args:
        metrics: A dictionary containing the evaluation metrics,
            where keys are metric names and values are the corresponding results.
        group: An optional group name to organize the metrics.
    """

record_model_outputs `abstractmethod` ¶

record_model_outputs(
    model_outputs: list[dict[str, Any]],
    group: str | None = None,
) -> None

Record the outputs generated by the model during evaluation.

Parameters:

model_outputs (list[dict[str, Any]]) –

A list of dictionaries, where each dictionary represents a single model output. The structure of these dictionaries may vary depending on the specific model and task.
group (str | None, default: None ) –

An optional group name to organize the model outputs.

Source code in flexeval/core/result_recorder/base.py

@abstractmethod
def record_model_outputs(self, model_outputs: list[dict[str, Any]], group: str | None = None) -> None:
    """
    Record the outputs generated by the model during evaluation.

    Args:
        model_outputs: A list of dictionaries, where each
            dictionary represents a single model output. The structure of these
            dictionaries may vary depending on the specific model and task.
        group: An optional group name to organize the model outputs.
    """

LocalRecorder ¶

A class to record the results in JSON format.

Parameters:

output_dir (str) –

The directory to save the results.

Source code in flexeval/core/result_recorder/local_recorder.py

class LocalRecorder(ResultRecorder):
    """
    A class to record the results in JSON format.

    Args:
        output_dir: The directory to save the results.
    """

    def __init__(self, output_dir: str, force: bool = False) -> None:
        self.output_dir = Path(output_dir)
        self.force = force

    @staticmethod
    def _check_output_dir_exists(output_dir: str | PathLike[str], checked_files: list[str]) -> None:
        output_dir = Path(output_dir)
        for file_name in checked_files:
            if (output_dir / file_name).exists():
                msg = (
                    f"`{output_dir / file_name}` already exists. If you want to overwrite it, "
                    f"please specify `--force true` from CLI or `force=True` when initializing the recorder."
                )
                raise FileExistsError(msg)

    def record_config(self, config: dict[str, Any], group: str | None = None) -> None:
        output_dir = self.output_dir
        if group is not None:
            output_dir = self.output_dir / group

        if not self.force:
            self._check_output_dir_exists(output_dir, [CONFIG_FILE_NAME])

        save_json(config, output_dir / CONFIG_FILE_NAME)
        logger.info(f"Saved the config to {output_dir / CONFIG_FILE_NAME}")

    def record_metrics(self, metrics: dict[str, Any], group: str | None = None) -> None:
        output_dir = self.output_dir
        if group is not None:
            output_dir = self.output_dir / group

        if not self.force:
            self._check_output_dir_exists(output_dir, [METRIC_FILE_NAME])

        save_json(metrics, output_dir / METRIC_FILE_NAME)
        logger.info(f"Saved the metrics to {output_dir / METRIC_FILE_NAME}")

    def record_model_outputs(self, model_outputs: list[dict[str, Any]], group: str | None = None) -> None:
        output_dir = self.output_dir
        if group is not None:
            output_dir = output_dir / group

        if not self.force:
            self._check_output_dir_exists(output_dir, [OUTPUTS_FILE_NAME])

        save_jsonl(model_outputs, output_dir / OUTPUTS_FILE_NAME)
        logger.info(f"Saved the outputs to {output_dir / OUTPUTS_FILE_NAME}")

output_dir `instance-attribute` ¶

output_dir = Path(output_dir)

force `instance-attribute` ¶

force = force

init ¶

__init__(output_dir: str, force: bool = False) -> None

Source code in flexeval/core/result_recorder/local_recorder.py

def __init__(self, output_dir: str, force: bool = False) -> None:
    self.output_dir = Path(output_dir)
    self.force = force

record_config ¶

record_config(
    config: dict[str, Any], group: str | None = None
) -> None

Source code in flexeval/core/result_recorder/local_recorder.py

def record_config(self, config: dict[str, Any], group: str | None = None) -> None:
    output_dir = self.output_dir
    if group is not None:
        output_dir = self.output_dir / group

    if not self.force:
        self._check_output_dir_exists(output_dir, [CONFIG_FILE_NAME])

    save_json(config, output_dir / CONFIG_FILE_NAME)
    logger.info(f"Saved the config to {output_dir / CONFIG_FILE_NAME}")

record_metrics ¶

record_metrics(
    metrics: dict[str, Any], group: str | None = None
) -> None

Source code in flexeval/core/result_recorder/local_recorder.py

def record_metrics(self, metrics: dict[str, Any], group: str | None = None) -> None:
    output_dir = self.output_dir
    if group is not None:
        output_dir = self.output_dir / group

    if not self.force:
        self._check_output_dir_exists(output_dir, [METRIC_FILE_NAME])

    save_json(metrics, output_dir / METRIC_FILE_NAME)
    logger.info(f"Saved the metrics to {output_dir / METRIC_FILE_NAME}")

record_model_outputs ¶

record_model_outputs(
    model_outputs: list[dict[str, Any]],
    group: str | None = None,
) -> None

Source code in flexeval/core/result_recorder/local_recorder.py

def record_model_outputs(self, model_outputs: list[dict[str, Any]], group: str | None = None) -> None:
    output_dir = self.output_dir
    if group is not None:
        output_dir = output_dir / group

    if not self.force:
        self._check_output_dir_exists(output_dir, [OUTPUTS_FILE_NAME])

    save_jsonl(model_outputs, output_dir / OUTPUTS_FILE_NAME)
    logger.info(f"Saved the outputs to {output_dir / OUTPUTS_FILE_NAME}")