Skip to content

HFRewardBenchDataset

RewardBenchDataset

Source code in flexeval/core/reward_bench_dataset/base.py
39
40
41
42
43
44
45
46
47
48
49
50
51
class RewardBenchDataset(Sequence[RewardBenchInstance], ABC):
    @abstractmethod
    def __len__(self) -> int:
        """Returns the number of instances in the dataset."""
        raise NotImplementedError

    @abstractmethod
    def __getitem__(self, i: int) -> RewardBenchInstance:
        """Returns the i-th instance."""
        raise NotImplementedError

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(num_instances={len(self)})"

__len__ abstractmethod

__len__() -> int

Returns the number of instances in the dataset.

Source code in flexeval/core/reward_bench_dataset/base.py
40
41
42
43
@abstractmethod
def __len__(self) -> int:
    """Returns the number of instances in the dataset."""
    raise NotImplementedError

__getitem__ abstractmethod

__getitem__(i: int) -> RewardBenchInstance

Returns the i-th instance.

Source code in flexeval/core/reward_bench_dataset/base.py
45
46
47
48
@abstractmethod
def __getitem__(self, i: int) -> RewardBenchInstance:
    """Returns the i-th instance."""
    raise NotImplementedError

__repr__

__repr__() -> str
Source code in flexeval/core/reward_bench_dataset/base.py
50
51
def __repr__(self) -> str:
    return f"{self.__class__.__name__}(num_instances={len(self)})"

RewardBenchInstance dataclass

A dataclass representing a triplet (prompt, chosen, rejected) of a reward bench task.

Source code in flexeval/core/reward_bench_dataset/base.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
@dataclass
class RewardBenchInstance:
    """A dataclass representing a triplet (prompt, chosen, rejected) of a
    reward bench task."""

    prompt: list[dict[str, str]]
    """
    The prompt for chosen/rejected responses.
    The format is a list of dictionaries, where each dictionary represents an OpenAI-format chat message,
    such as `{"role": "user", "content": "Hello!"}`.
    """
    chosen: list[dict[str, str]]
    """
    The chosen response to the prompt.
    The format is the same as `prompt`.
    """
    rejected: list[dict[str, str]]
    """
    The rejected response to the prompt.
    The format is the same as `prompt`.
    """
    category_key: str | None = None
    """
    A key to compute category-wise average accuracies.
    """
    extra_info: dict[str, Any] = field(default_factory=dict)
    """
    Extra information that can be used by passing to `Metric`.
    """

prompt instance-attribute

prompt: list[dict[str, str]]

The prompt for chosen/rejected responses. The format is a list of dictionaries, where each dictionary represents an OpenAI-format chat message, such as {"role": "user", "content": "Hello!"}.

chosen instance-attribute

chosen: list[dict[str, str]]

The chosen response to the prompt. The format is the same as prompt.

rejected instance-attribute

rejected: list[dict[str, str]]

The rejected response to the prompt. The format is the same as prompt.

category_key class-attribute instance-attribute

category_key: str | None = None

A key to compute category-wise average accuracies.

extra_info class-attribute instance-attribute

extra_info: dict[str, Any] = field(default_factory=dict)

Extra information that can be used by passing to Metric.

__init__

__init__(
    prompt: list[dict[str, str]],
    chosen: list[dict[str, str]],
    rejected: list[dict[str, str]],
    category_key: str | None = None,
    extra_info: dict[str, Any] = dict(),
) -> None

HFRewardBenchDataset

Load RewardBenchInstances from a Hugging Face dataset.

Parameters:

  • path (str) –

    The path to the Hugging Face dataset.

  • split (str) –

    The split of the dataset.

  • subset (str | None, default: None ) –

    The subset of the dataset.

  • dataset_kwargs (dict[str, Any] | None, default: None ) –

    The keyword arguments to pass to the Hugging Face dataset.

Source code in flexeval/core/reward_bench_dataset/template_based.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
class HFRewardBenchDataset(TemplateRewardBenchDataset):
    """
    Load RewardBenchInstances from a Hugging Face dataset.

    Args:
        path: The path to the Hugging Face dataset.
        split: The split of the dataset.
        subset: The subset of the dataset.
        dataset_kwargs: The keyword arguments to pass to the Hugging Face dataset.
    """

    def __init__(
        self,
        path: str,
        split: str,
        subset: str | None = None,
        dataset_kwargs: dict[str, Any] | None = None,
        prompt_template: str = "{{ prompt }}",
        chosen_template: str = "{{ chosen }}",
        rejected_template: str = "{{ rejected }}",
        category_template: str | None = None,
        extra_info_templates: dict[str, str] | None = None,
        data_range: tuple[int, int] | None = None,
        keep_conditions: dict[str, str] | None = None,
        remove_conditions: dict[str, str] | None = None,
    ) -> None:
        dataset_kwargs = dataset_kwargs or {}
        dataset = datasets.load_dataset(path, name=subset, split=split, **dataset_kwargs)
        items = [dict(item) for item in dataset]

        super().__init__(
            items=items,
            prompt_template=prompt_template,
            chosen_template=chosen_template,
            rejected_template=rejected_template,
            category_template=category_template,
            extra_info_templates=extra_info_templates,
            data_range=data_range,
            keep_conditions=keep_conditions,
            remove_conditions=remove_conditions,
        )

__init__

__init__(
    path: str,
    split: str,
    subset: str | None = None,
    dataset_kwargs: dict[str, Any] | None = None,
    prompt_template: str = "{{ prompt }}",
    chosen_template: str = "{{ chosen }}",
    rejected_template: str = "{{ rejected }}",
    category_template: str | None = None,
    extra_info_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None
Source code in flexeval/core/reward_bench_dataset/template_based.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def __init__(
    self,
    path: str,
    split: str,
    subset: str | None = None,
    dataset_kwargs: dict[str, Any] | None = None,
    prompt_template: str = "{{ prompt }}",
    chosen_template: str = "{{ chosen }}",
    rejected_template: str = "{{ rejected }}",
    category_template: str | None = None,
    extra_info_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None:
    dataset_kwargs = dataset_kwargs or {}
    dataset = datasets.load_dataset(path, name=subset, split=split, **dataset_kwargs)
    items = [dict(item) for item in dataset]

    super().__init__(
        items=items,
        prompt_template=prompt_template,
        chosen_template=chosen_template,
        rejected_template=rejected_template,
        category_template=category_template,
        extra_info_templates=extra_info_templates,
        data_range=data_range,
        keep_conditions=keep_conditions,
        remove_conditions=remove_conditions,
    )

JsonlRewardBenchDataset

Load RewardBenchInstances from a JSONL file.

Parameters:

  • path (str) –

    The path to the JSONL file.

Source code in flexeval/core/reward_bench_dataset/template_based.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class JsonlRewardBenchDataset(TemplateRewardBenchDataset):
    """
    Load RewardBenchInstances from a JSONL file.

    Args:
        path: The path to the JSONL file.
    """

    def __init__(
        self,
        path: str,
        prompt_template: str = "{{ prompt }}",
        chosen_template: str = "{{ chosen }}",
        rejected_template: str = "{{ rejected }}",
        category_template: str | None = None,
        extra_info_templates: dict[str, str] | None = None,
        data_range: tuple[int, int] | None = None,
        keep_conditions: dict[str, str] | None = None,
        remove_conditions: dict[str, str] | None = None,
    ) -> None:
        with open(path) as f:
            items = [json.loads(line) for line in f]

        super().__init__(
            items=items,
            prompt_template=prompt_template,
            chosen_template=chosen_template,
            rejected_template=rejected_template,
            category_template=category_template,
            extra_info_templates=extra_info_templates,
            data_range=data_range,
            keep_conditions=keep_conditions,
            remove_conditions=remove_conditions,
        )

__init__

__init__(
    path: str,
    prompt_template: str = "{{ prompt }}",
    chosen_template: str = "{{ chosen }}",
    rejected_template: str = "{{ rejected }}",
    category_template: str | None = None,
    extra_info_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None
Source code in flexeval/core/reward_bench_dataset/template_based.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def __init__(
    self,
    path: str,
    prompt_template: str = "{{ prompt }}",
    chosen_template: str = "{{ chosen }}",
    rejected_template: str = "{{ rejected }}",
    category_template: str | None = None,
    extra_info_templates: dict[str, str] | None = None,
    data_range: tuple[int, int] | None = None,
    keep_conditions: dict[str, str] | None = None,
    remove_conditions: dict[str, str] | None = None,
) -> None:
    with open(path) as f:
        items = [json.loads(line) for line in f]

    super().__init__(
        items=items,
        prompt_template=prompt_template,
        chosen_template=chosen_template,
        rejected_template=rejected_template,
        category_template=category_template,
        extra_info_templates=extra_info_templates,
        data_range=data_range,
        keep_conditions=keep_conditions,
        remove_conditions=remove_conditions,
    )