Skip to content

GenerationDataset

GenerationDataset

A dataset holding GenerationInstance.

Source code in flexeval/core/generation_dataset/base.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class GenerationDataset(Sequence[GenerationInstance], ABC):
    """A dataset holding `GenerationInstance`."""

    @abstractmethod
    def __len__(self) -> int:
        """
        Returns the number of instances in the dataset.
        """
        raise NotImplementedError

    @abstractmethod
    def __getitem__(self, i: int) -> GenerationInstance:
        """
        Returns the i-th instance.
        """
        raise NotImplementedError

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(num_instances={len(self)})"

__len__ abstractmethod

__len__() -> int

Returns the number of instances in the dataset.

Source code in flexeval/core/generation_dataset/base.py
29
30
31
32
33
34
@abstractmethod
def __len__(self) -> int:
    """
    Returns the number of instances in the dataset.
    """
    raise NotImplementedError

__getitem__ abstractmethod

__getitem__(i: int) -> GenerationInstance

Returns the i-th instance.

Source code in flexeval/core/generation_dataset/base.py
36
37
38
39
40
41
@abstractmethod
def __getitem__(self, i: int) -> GenerationInstance:
    """
    Returns the i-th instance.
    """
    raise NotImplementedError

__repr__

__repr__() -> str
Source code in flexeval/core/generation_dataset/base.py
43
44
def __repr__(self) -> str:
    return f"{self.__class__.__name__}(num_instances={len(self)})"

GenerationInstance dataclass

A dataclass representing a single input-output pair of a generation task.

Source code in flexeval/core/generation_dataset/base.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
@dataclass
class GenerationInstance:
    """
    A dataclass representing a single input-output pair of a generation task.
    """

    inputs: dict[str, Any]
    """
    Inputs of the generation task.
    This will be embedded into the prompt for the language model in `PromptTemplate`.
    """
    references: list[str] = field(default_factory=list)
    """
    Reference outputs for the generation task.
    The model's output will be evaluated against these references in `Metric`.
    """

inputs instance-attribute

inputs: dict[str, Any]

Inputs of the generation task. This will be embedded into the prompt for the language model in PromptTemplate.

references class-attribute instance-attribute

references: list[str] = field(default_factory=list)

Reference outputs for the generation task. The model's output will be evaluated against these references in Metric.

__init__

__init__(
    inputs: dict[str, Any], references: list[str] = list()
) -> None

HFGenerationDataset

Load GenerationInstances from a huggingface dataset.

Parameters:

  • path (str) –

    The path to the Hugging Face dataset.

  • split (str) –

    The split of the dataset.

  • subset (str | None, default: None ) –

    The subset of the dataset.

  • dataset_kwargs (dict[str, Any] | None, default: None ) –

    The additional keyword arguments for loading the dataset.

Source code in flexeval/core/generation_dataset/template_based.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class HFGenerationDataset(TemplateGenerationDataset):
    """
    Load GenerationInstances from a huggingface dataset.

    Args:
        path: The path to the Hugging Face dataset.
        split: The split of the dataset.
        subset: The subset of the dataset.
        dataset_kwargs: The additional keyword arguments for loading the dataset.
    """

    def __init__(
        self,
        path: str,
        split: str,
        subset: str | None = None,
        dataset_kwargs: dict[str, Any] | None = None,
        reference_template: str | None = None,
        reference_list_template: str | None = None,
        input_templates: dict[str, str] | None = None,
        data_range: tuple[int, int] | None = None,
        keep_conditions: dict[str, str] | None = None,
        remove_conditions: dict[str, str] | None = None,
    ) -> None:
        dataset_kwargs = dataset_kwargs or {}
        dataset = datasets.load_dataset(path, name=subset, split=split, **dataset_kwargs)
        items = [dict(item) for item in dataset]

        super().__init__(
            items=items,
            reference_template=reference_template,
            reference_list_template=reference_list_template,
            input_templates=input_templates,
            data_range=data_range,
            keep_conditions=keep_conditions,
            remove_conditions=remove_conditions,
        )

__init__

__init__(
    path: str,
    split: str,
    subset: str | None = None,
    dataset_kwargs: dict[str, Any] | None = None,
    reference_template: str | None = None,
    reference_list_template: str | None = None,
    input_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None
Source code in flexeval/core/generation_dataset/template_based.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def __init__(
    self,
    path: str,
    split: str,
    subset: str | None = None,
    dataset_kwargs: dict[str, Any] | None = None,
    reference_template: str | None = None,
    reference_list_template: str | None = None,
    input_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None:
    dataset_kwargs = dataset_kwargs or {}
    dataset = datasets.load_dataset(path, name=subset, split=split, **dataset_kwargs)
    items = [dict(item) for item in dataset]

    super().__init__(
        items=items,
        reference_template=reference_template,
        reference_list_template=reference_list_template,
        input_templates=input_templates,
        data_range=data_range,
        keep_conditions=keep_conditions,
        remove_conditions=remove_conditions,
    )

JsonlGenerationDataset

Load GenerationInstances from a JSONL file.

Parameters:

  • path (str) –

    The path to the JSONL file.

Source code in flexeval/core/generation_dataset/template_based.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
class JsonlGenerationDataset(TemplateGenerationDataset):
    """
    Load GenerationInstances from a JSONL file.

    Args:
        path: The path to the JSONL file.
    """

    def __init__(
        self,
        path: str,
        reference_template: str | None = None,
        reference_list_template: str | None = None,
        input_templates: dict[str, str] | None = None,
        data_range: tuple[int, int] | None = None,
        keep_conditions: dict[str, str] | None = None,
        remove_conditions: dict[str, str] | None = None,
    ) -> None:
        with open(path) as f:
            items = [json.loads(line) for line in f]

        super().__init__(
            items=items,
            reference_template=reference_template,
            reference_list_template=reference_list_template,
            input_templates=input_templates,
            data_range=data_range,
            keep_conditions=keep_conditions,
            remove_conditions=remove_conditions,
        )

__init__

__init__(
    path: str,
    reference_template: str | None = None,
    reference_list_template: str | None = None,
    input_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None
Source code in flexeval/core/generation_dataset/template_based.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def __init__(
    self,
    path: str,
    reference_template: str | None = None,
    reference_list_template: str | None = None,
    input_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None:
    with open(path) as f:
        items = [json.loads(line) for line in f]

    super().__init__(
        items=items,
        reference_template=reference_template,
        reference_list_template=reference_list_template,
        input_templates=input_templates,
        data_range=data_range,
        keep_conditions=keep_conditions,
        remove_conditions=remove_conditions,
    )

TemplateGenerationDataset

Load GenerationInstances from a JSONL file.

Parameters:

  • items (list[dict[str, Any]]) –

    A list of dict items.

  • reference_template (str | None, default: None ) –

    Specify the Jinja2 template to render the reference string if the dataset has a single reference.

  • reference_list_template (str | None, default: None ) –

    Specify the Jinja2 template to render a list of reference strings if the dataset has multiple references.

  • input_templates (dict[str, str] | None, default: None ) –

    A dictionary of Jinja2 templates for the inputs.

  • data_range (tuple[int, int] | None, default: None ) –

    The range of data to use.

  • keep_conditions (dict[str, str] | None, default: None ) –

    A dictionary to indicate the condition to filter certain items. The key is a Jinja2 template string to embed the item into a string, and the value is the value to keep.

  • remove_conditions (dict[str, str] | None, default: None ) –

    A dictionary to indicate the condition to remove certain items. The key is a Jinja2 template string to embed the item into a string, and the value is the value to remove.

Source code in flexeval/core/generation_dataset/template_based.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
class TemplateGenerationDataset(GenerationDataset):
    """
    Load GenerationInstances from a JSONL file.

    Args:
        items: A list of dict items.
        reference_template: Specify the Jinja2 template to render the reference string
            if the dataset has a single reference.
        reference_list_template: Specify the Jinja2 template to render a list of reference strings
            if the dataset has multiple references.
        input_templates: A dictionary of Jinja2 templates for the inputs.
        data_range: The range of data to use.
        keep_conditions: A dictionary to indicate the condition to filter certain items.
            The key is a Jinja2 template string to embed the item into a string, and the value is the value to keep.
        remove_conditions: A dictionary to indicate the condition to remove certain items.
            The key is a Jinja2 template string to embed the item into a string, and the value is the value to remove.
    """

    def __init__(
        self,
        items: list[dict[str, Any]],
        reference_template: str | None = None,
        reference_list_template: str | None = None,
        input_templates: dict[str, str] | None = None,
        data_range: tuple[int, int] | None = None,
        keep_conditions: dict[str, str] | None = None,
        remove_conditions: dict[str, str] | None = None,
    ) -> None:
        if reference_template and reference_list_template:
            msg = "Only one of reference_template and reference_list_template can be set."
            raise ValueError(msg)

        if data_range:
            start, end = data_range
            items = items[start:end]

        keep_conditions = keep_conditions or {}
        for template_str, value_to_keep in keep_conditions.items():
            key_template = JINJA2_ENV.from_string(template_str)
            items = [item for item in items if key_template.render(**item) == value_to_keep]
        remove_conditions = remove_conditions or {}
        for template_str, value_to_remove in remove_conditions.items():
            key_template = JINJA2_ENV.from_string(template_str)
            items = [item for item in items if key_template.render(**item) != value_to_remove]

        self.items = items
        input_templates = input_templates or {}
        self.input_templates: dict[str, Template] = {k: JINJA2_ENV.from_string(v) for k, v in input_templates.items()}
        self.reference_template = JINJA2_ENV.from_string(reference_template) if reference_template else None
        self.reference_list_template = (
            JINJA2_ENV.from_string(reference_list_template) if reference_list_template else None
        )

    def __len__(self) -> int:
        return len(self.items)

    def __getitem__(self, i: int) -> GenerationInstance:
        item = self.items[i]
        inputs = dict(item.items())
        inputs.update({k: v.render(**item) for k, v in self.input_templates.items()})

        reference_list: list[str] = []
        if self.reference_template:
            reference_string = self.reference_template.render(**item)
            reference_list.append(reference_string)
        if self.reference_list_template:
            reference_list_string = self.reference_list_template.render(**item)
            if not (reference_list_string.startswith("[") and reference_list_string.endswith("]")):
                msg = (
                    f"The reference_list_template should render a list of strings "
                    f"but we got `{reference_list_string}`."
                )
                raise ValueError(msg)
            reference_list.extend([str(ref) for ref in literal_eval(reference_list_string)])
        return GenerationInstance(inputs=inputs, references=reference_list)

items instance-attribute

items = items

input_templates instance-attribute

input_templates: dict[str, Template] = {
    k: from_string(v) for (k, v) in items()
}

reference_template instance-attribute

reference_template = (
    from_string(reference_template)
    if reference_template
    else None
)

reference_list_template instance-attribute

reference_list_template = (
    from_string(reference_list_template)
    if reference_list_template
    else None
)

__init__

__init__(
    items: list[dict[str, Any]],
    reference_template: str | None = None,
    reference_list_template: str | None = None,
    input_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None
Source code in flexeval/core/generation_dataset/template_based.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def __init__(
    self,
    items: list[dict[str, Any]],
    reference_template: str | None = None,
    reference_list_template: str | None = None,
    input_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None:
    if reference_template and reference_list_template:
        msg = "Only one of reference_template and reference_list_template can be set."
        raise ValueError(msg)

    if data_range:
        start, end = data_range
        items = items[start:end]

    keep_conditions = keep_conditions or {}
    for template_str, value_to_keep in keep_conditions.items():
        key_template = JINJA2_ENV.from_string(template_str)
        items = [item for item in items if key_template.render(**item) == value_to_keep]
    remove_conditions = remove_conditions or {}
    for template_str, value_to_remove in remove_conditions.items():
        key_template = JINJA2_ENV.from_string(template_str)
        items = [item for item in items if key_template.render(**item) != value_to_remove]

    self.items = items
    input_templates = input_templates or {}
    self.input_templates: dict[str, Template] = {k: JINJA2_ENV.from_string(v) for k, v in input_templates.items()}
    self.reference_template = JINJA2_ENV.from_string(reference_template) if reference_template else None
    self.reference_list_template = (
        JINJA2_ENV.from_string(reference_list_template) if reference_list_template else None
    )

__len__

__len__() -> int
Source code in flexeval/core/generation_dataset/template_based.py
69
70
def __len__(self) -> int:
    return len(self.items)

__getitem__

__getitem__(i: int) -> GenerationInstance
Source code in flexeval/core/generation_dataset/template_based.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def __getitem__(self, i: int) -> GenerationInstance:
    item = self.items[i]
    inputs = dict(item.items())
    inputs.update({k: v.render(**item) for k, v in self.input_templates.items()})

    reference_list: list[str] = []
    if self.reference_template:
        reference_string = self.reference_template.render(**item)
        reference_list.append(reference_string)
    if self.reference_list_template:
        reference_list_string = self.reference_list_template.render(**item)
        if not (reference_list_string.startswith("[") and reference_list_string.endswith("]")):
            msg = (
                f"The reference_list_template should render a list of strings "
                f"but we got `{reference_list_string}`."
            )
            raise ValueError(msg)
        reference_list.extend([str(ref) for ref in literal_eval(reference_list_string)])
    return GenerationInstance(inputs=inputs, references=reference_list)

SacreBleuDataset

Load datasets from the sacrebleu library. The available datasets are defined in sacrebleu.DATASETS.

Source code in flexeval/core/generation_dataset/sacrebleu_dataset.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class SacreBleuDataset(GenerationDataset):
    """Load datasets from the [sacrebleu](https://github.com/mjpost/sacrebleu) library.
    The available datasets are defined in sacrebleu.DATASETS.
    """

    def __init__(self, name: str, langpair: str) -> None:
        self._source_list: list[str] = list(sacrebleu.DATASETS[name].source(langpair))
        self._references_list: list[list[str]] = [
            [r.strip() for r in refs] for refs in sacrebleu.DATASETS[name].references(langpair)
        ]

        if len(self._source_list) != len(self._references_list):
            msg = "The number of source and reference pairs should be the same."
            raise ValueError(msg)

    def __len__(self) -> int:
        return len(self._source_list)

    def __getitem__(self, i: int) -> GenerationInstance:
        return GenerationInstance(
            inputs={"source": self._source_list[i]},
            references=self._references_list[i],
        )

__init__

__init__(name: str, langpair: str) -> None
Source code in flexeval/core/generation_dataset/sacrebleu_dataset.py
11
12
13
14
15
16
17
18
19
def __init__(self, name: str, langpair: str) -> None:
    self._source_list: list[str] = list(sacrebleu.DATASETS[name].source(langpair))
    self._references_list: list[list[str]] = [
        [r.strip() for r in refs] for refs in sacrebleu.DATASETS[name].references(langpair)
    ]

    if len(self._source_list) != len(self._references_list):
        msg = "The number of source and reference pairs should be the same."
        raise ValueError(msg)

__len__

__len__() -> int
Source code in flexeval/core/generation_dataset/sacrebleu_dataset.py
21
22
def __len__(self) -> int:
    return len(self._source_list)

__getitem__

__getitem__(i: int) -> GenerationInstance
Source code in flexeval/core/generation_dataset/sacrebleu_dataset.py
24
25
26
27
28
def __getitem__(self, i: int) -> GenerationInstance:
    return GenerationInstance(
        inputs={"source": self._source_list[i]},
        references=self._references_list[i],
    )