Code generation

jhumaneval¶

Zero-shot Python code generation task in Japanese.

References:

Hugging Face Dataset

{
  class_path: 'Generation',
  init_args: {
    eval_dataset: {
      class_path: 'HFGenerationDataset',
      init_args: {
        path: 'kogi-jwu/jhumaneval',
        split: 'test',
        reference_template: '{{ test }}\n\ncheck({{ entry_point }})\n',
      },
    },
    prompt_template: '{{ prompt }}',
    metrics: [
      { class_path: 'CodeEval', init_args: { code_template: '{{ prompt }}{{ lm_output }}' } },
    ],
    gen_kwargs: { max_new_tokens: 512, stop_sequences: ['\nclass', '\ndef', '\n#', '\n@', '\nprint', '\nif', '\n```'] },
    batch_size: 4,
  },
}

jhumaneval_tab_indent¶

Zero-shot Python code generation task in Japanese.

This is a version of jhumaneval preprocessed to replace indentation spaces with tabs. Some models (e.g., Llama) seems to have trouble with spaces in the prompt.

local original_config = import './jhumaneval.jsonnet';

original_config {
  init_args+: {
    eval_dataset+: {
      init_args+: {
        reference_template: '{{ test | replace("    ", "\t") }}\n\ncheck({{ entry_point }})\n',
      },
    },
    prompt_template: "{{ prompt | replace('    ', '\t') }}",
    metrics: [
      { class_path: 'CodeEval', init_args: { code_template: '{{ prompt | replace("    ", "\t") }}{{ lm_output }}' } },
    ],
  },
}

mbpp¶

Mostly Basic Python Problems (MBPP) is a dataset of crowd-sourced programming problems.

References:

Hugging Face Dataset

Program Synthesis with Large Language Models

local dataset_base_args = {
  class_path: 'HFGenerationDataset',
  init_args: {
    path: 'mbpp',
    subset: 'sanitized',
    reference_list_template: '{{ test_list }}',
  },
};

{
  class_path: 'Generation',
  init_args: {
    eval_dataset: dataset_base_args { init_args+: { split: 'test' } },
    few_shot_generator: {
      class_path: 'RandomFewShotGenerator',
      init_args: {
        dataset: dataset_base_args { init_args+: { split: 'prompt' } },
        num_shots: 3,
      },
    },
    prompt_template: |||
      {% for item in few_shot_data %}
      ## Question
      {{ item.prompt }}
      ## Test cases
      ```python
      {{ item.test_list | join('\n') }}
      ```
      ## Code
      ```python
      {{ item.code }}
      ```
      {% endfor %}
      ## Question
      {{ prompt }}
      ## Test cases
      ```python
      {{ test_list | join('\n') }}
      ```
      ## Code
      ```python
    |||,
    metrics: [
      { class_path: 'CodeEval' },
    ],
    gen_kwargs: { max_new_tokens: 512, stop_sequences: ['```'] },
    batch_size: 4,
  },
}

mbpp_tab_indent¶

Mostly Basic Python Problems (MBPP) is a dataset of crowd-sourced programming problems.

This is a version of openai_humaneval preprocessed to replace indentation spaces with tabs. Some models (e.g., Llama) seems to have trouble with spaces in the prompt.

local original_config = import './mbpp.jsonnet';

original_config {
  init_args+: {
    prompt_template: |||
      {% for item in few_shot_data %}
      ## Question
      {{ item.prompt }}
      ## Test cases
      ```python
      {{ item.test_list | join('\n') }}
      ```
      ## Code
      ```python
      {{ item.code | replace('    ', '\t') }}
      ```
      {% endfor %}
      ## Question
      {{ prompt }}
      ## Test cases
      ```python
      {{ test_list | join('\n') }}
      ```
      ## Code
      ```python
    |||,
  },
}

openai_humaneval¶

Zero-shot Python code generation task developed by OpenAI.

References:

Hugging Face Dataset

Evaluating Large Language Models Trained on Code

{
  class_path: 'Generation',
  init_args: {
    eval_dataset: {
      class_path: 'HFGenerationDataset',
      init_args: {
        path: 'openai_humaneval',
        split: 'test',
        reference_template: '{{ test }}\n\ncheck({{ entry_point }})\n',
      },
    },
    prompt_template: '{{ prompt }}',
    metrics: [
      { class_path: 'CodeEval', init_args: { code_template: '{{ prompt }}{{ lm_output }}' } },
    ],
    gen_kwargs: { max_new_tokens: 512, stop_sequences: ['\nclass', '\ndef', '\n#', '\n@', '\nprint', '\nif', '\n```'] },
    batch_size: 4,
  },
}

openai_humaneval_tab_indent¶

Zero-shot Python code generation task developed by OpenAI.

This is a version of openai_humaneval preprocessed to replace indentation spaces with tabs. Some models (e.g., Llama) seems to have trouble with spaces in the prompt.

local original_config = import './openai_humaneval.jsonnet';

original_config {
  init_args+: {
    eval_dataset+: {
      init_args+: {
        reference_template: '{{ test | replace("    ", "\t") }}\n\ncheck({{ entry_point }})\n',
      },
    },
    prompt_template: '{{ prompt | replace("    ", "\t") }}',
    metrics: [
      { class_path: 'CodeEval', init_args: { code_template: '{{ prompt | replace("    ", "\t") }}{{ lm_output }}' } },
    ],
  },
}