En generation

babi¶

Synthetic question answering dataset with reasoning questions.

References:

Hugging Face Dataset

Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks

local dataset_base_args = {
  class_path: 'HFGenerationDataset',
  init_args: {
    path: 'Muennighoff/babi',
    reference_template: '{{ answer }}',
  },
};

{
  class_path: 'Generation',
  init_args: {
    eval_dataset: dataset_base_args { init_args+: { split: 'validation' } },
    few_shot_generator: {
      class_path: 'RandomFewShotGenerator',
      init_args: {
        dataset: dataset_base_args { init_args+: { split: 'train' } },
        num_shots: 3,
      },
    },
    prompt_template: |||
      {% for item in few_shot_data %}
      Passage: {{ item.passage | trim }}
      Question: {{ item.question }}
      Answer: "{{ item.references[0] }}"
      {% endfor %}
      Passage: {{ passage | trim }}
      Question: {{ question }}
    ||| + 'Answer: "',
    metrics: [
      { class_path: 'CharF1' },
      { class_path: 'ExactMatch' },
    ],
    gen_kwargs: { max_new_tokens: 32, stop_sequences: ['"'] },
  },
}

commonsense_qa¶

CommonsenseQA is a multiple-choice question answering dataset that requires different types of commonsense knowledge to predict the correct answers. This is a setup for generating answers based on the choices provided.

References:

Hugging Face Dataset

CommonsenseQA: A Question Answering Challenge Targeting Commonsense Knowledge

local dataset_base_args = {
  class_path: 'HFGenerationDataset',
  init_args: {
    path: 'tau/commonsense_qa',
    reference_template: '{% set answer_index = choices.label.index(answerKey) %}{{ choices.text[answer_index] }}',
  },
};

{
  class_path: 'Generation',
  init_args: {
    eval_dataset: dataset_base_args { init_args+: { split: 'validation' } },
    few_shot_generator: {
      class_path: 'RandomFewShotGenerator',
      init_args: {
        dataset: dataset_base_args { init_args+: { split: 'train' } },
        num_shots: 2,
      },
    },
    prompt_template: |||
      Choose the correct answer from the choices.
      {% for item in few_shot_data %}
      Choices:
      0. "{{ item.choices.text[0] }}"
      1. "{{ item.choices.text[1] }}"
      2. "{{ item.choices.text[2] }}"
      3. "{{ item.choices.text[3] }}"
      4. "{{ item.choices.text[4] }}"
      Question: {{ item.question }}
      Answer: "{{ item.references[0] }}"
      {% endfor %}
      Choices:
      0. "{{ choices.text[0] }}"
      1. "{{ choices.text[1] }}"
      2. "{{ choices.text[2] }}"
      3. "{{ choices.text[3] }}"
      4. "{{ choices.text[4] }}"
      Question: {{question}}
    ||| + 'Answer: "',
    metrics: [
      { class_path: 'ExactMatch' },
    ],
    gen_kwargs: { max_new_tokens: 40, stop_sequences: ['"'] },
  },
}

gsm8k¶

GSM8K (Grade School Math 8K) is a dataset of 8.5K high quality linguistically diverse grade school math word problems. The dataset was created to support the task of question answering on basic mathematical problems that require multi-step reasoning.

References:

[Hugging Face Dataset](https://huggingface.co/datasets/gsm8k]

Training Verifiers to Solve Math Word Problems

local dataset_base_args = {
  class_path: 'HFGenerationDataset',
  init_args: {
    path: 'gsm8k',
    subset: 'main',
    reference_template: '{{ answer | regex_replace("<<.*?>>", "") }}',
  },
};

{
  class_path: 'Generation',
  init_args: {
    eval_dataset: dataset_base_args { init_args+: { split: 'test' } },
    few_shot_generator: {
      class_path: 'RandomFewShotGenerator',
      init_args: {
        dataset: dataset_base_args { init_args+: { split: 'train' } },
        num_shots: 4,
      },
    },
    prompt_template: |||
      {% for item in few_shot_data %}
      Q: {{ item.question }}
      A: {{ item.references[0] }}
      {% endfor %}
      Q: {{ question }}
    ||| + 'A:',
    metrics: [
      {
        class_path: 'ExactMatch',
        init_args: {
          lm_output_processor: { class_path: 'RegexExtractor', init_args: { pattern: '-?[0-9.,]+' } },
          reference_processor: { class_path: 'RegexExtractor', init_args: { pattern: '-?[0-9.,]+' } },
        },
      },
    ],
    gen_kwargs: { max_new_tokens: 256, stop_sequences: ['Q:'] },
  },
}

squad_v1¶

Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage.

References:

Hugging Face Dataset

SQuAD: 100,000+ Questions for Machine Comprehension of Text

local dataset_base_args = {
  class_path: 'HFGenerationDataset',
  init_args: {
    path: 'rajpurkar/squad',
    reference_list_template: '{{ answers.text }}',
  },
};

{
  class_path: 'Generation',
  init_args: {
    eval_dataset: dataset_base_args { init_args+: { split: 'validation' } },
    few_shot_generator: {
      class_path: 'RandomFewShotGenerator',
      init_args: {
        dataset: dataset_base_args { init_args+: { split: 'train' } },
        num_shots: 2,
      },
    },
    prompt_template: |||
      {% for item in few_shot_data %}
      Context: {{ item.context | trim }}
      Question: {{ item.question }}
      Answer: "{{ item.references[0] }}"
      {% endfor %}
      Context: {{ context | trim }}
      Question: {{ question }}
    ||| + 'Answer: "',
    metrics: [
      { class_path: 'CharF1' },
      { class_path: 'ExactMatch' },
    ],
    gen_kwargs: { max_new_tokens: 32, stop_sequences: ['"'] },
  },
}

trivia_qa¶

TriviaqQA is a reading comprehension dataset containing over 650K question-answer-evidence triples. TriviaqQA includes 95K question-answer pairs authored by trivia enthusiasts and independently gathered evidence documents, six per question on average, that provide high quality distant supervision for answering the questions.

References:

Hugging Face Dataset

TriviaQA: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension

local dataset_base_args = {
  class_path: 'HFGenerationDataset',
  init_args: {
    path: 'trivia_qa',
    subset: 'rc.nocontext',
    reference_list_template: '{{ answer.aliases }}',
  },
};


{
  class_path: 'Generation',
  init_args: {
    eval_dataset: dataset_base_args { init_args+: { split: 'validation' } },
    few_shot_generator: {
      class_path: 'RandomFewShotGenerator',
      init_args: {
        dataset: dataset_base_args { init_args+: { split: 'train' } },
        num_shots: 0,
      },
    },
    prompt_template: |||
      {% for item in few_shot_data %}
      Question: {{ item.question }}
      Answer: "{{ item.references[0] }}"
      {% endfor %}
      Question: {{ question }}
    ||| + 'Answer: "',
    metrics: [
      { class_path: 'CharF1' },
      { class_path: 'ExactMatch' },
    ],
    gen_kwargs: { max_new_tokens: 32, stop_sequences: ['"'] },
  },
}

twitter_sentiment¶

TSATC: Twitter Sentiment Analysis Training Corpus. This dataset is a preprocessed version of the original dataset. See the hugging face dataset page for more information.

References:

Hugging Face Dataset

Twitter Sentiment Analysis Training Corpus (Dataset)

local dataset_base_args = {
  class_path: 'HFGenerationDataset',
  init_args: {
    path: 'carblacac/twitter-sentiment-analysis',
    reference_template: "{{ ['Positive', 'Negative'][feeling] }}",
  },
};

{
  class_path: 'Generation',
  init_args: {
    eval_dataset: dataset_base_args { init_args+: { split: 'test' } },
    few_shot_generator: {
      class_path: 'BalancedFewShotGenerator',
      init_args: {
        dataset: dataset_base_args { init_args+: { split: 'train' } },
        num_shots: 4,
      },
    },
    prompt_template: |||
      Classify the sentiment of the following tweet.
      {% for item in few_shot_data %}
      Tweet: {{ item.text }}
      Sentiment: `{{ item.references[0] }}`
      {% endfor %}
      Tweet: {{ text }}
    ||| + 'Sentiment: `',
    metrics: [
      { class_path: 'ExactMatch' },
    ],
    gen_kwargs: { max_new_tokens: 8, stop_sequences: ['`'] },
  },
}