StringProcessor

StringProcessor ¶

An interface class used to process the model's output before evaluation. Typically used in Metric.

Source code in flexeval/core/string_processor/base.py

class StringProcessor(ABC):
    """An interface class used to process the model's output before evaluation.
    Typically used in `Metric`.
    """

    @abstractmethod
    def __call__(self, text: str) -> str:
        """
        Process the input text.

        Args:
            text: The text to process.
        """
        raise NotImplementedError

call `abstractmethod` ¶

__call__(text: str) -> str

Process the input text.

Parameters:

text (str) –

The text to process.

Source code in flexeval/core/string_processor/base.py

@abstractmethod
def __call__(self, text: str) -> str:
    """
    Process the input text.

    Args:
        text: The text to process.
    """
    raise NotImplementedError

AIONormalizer ¶

StringProcessor used for AI王 (AI king) question answering task. This is adapted from the official script.

Examples:

>>> from flexeval import AIONormalizer
>>> processor = AIONormalizer()
>>> text = "「蛹化(ようか)」"
>>> normalized_text = processor(text)
>>> print(normalized_text)
蛹化

Source code in flexeval/core/string_processor/aio.py

class AIONormalizer(StringProcessor):
    """StringProcessor used for AI王 (AI king) question answering task.
    This is adapted from
    [the official script](https://github.com/cl-tohoku/aio4-bpr-baseline/blob/c5a226296b5e1c403268016dc7136147bbb515fe/compute_score.py).

    Examples:
        >>> from flexeval import AIONormalizer
        >>> processor = AIONormalizer()
        >>> text = "「蛹化(ようか)」"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        蛹化
    """

    def __call__(self, text: str) -> str:
        # substitute some symbols that will not be replaced by unicode normalization
        text = text.replace("～", "〜")

        # unicode normalization
        text = unicodedata.normalize("NFKC", text)

        # lowercase alphabetical characters
        text = text.lower()

        # remove kagi-kakkos
        text = re.sub(r"「(.*?)」", r"\1", text)
        text = re.sub(r"『(.*?)』", r"\1", text)

        # remove some punctuation marks
        text = text.replace("・", "")
        text = text.replace("=", "")
        text = text.replace("-", "")

        # compress whitespaces
        text = re.sub(r"\s+", "", text).strip()

        # remove parenthesis: 蛹化(ようか)　→　蛹化
        return re.sub(r"\((.*?)\)", "", text)

call ¶

__call__(text: str) -> str

Source code in flexeval/core/string_processor/aio.py

def __call__(self, text: str) -> str:
    # substitute some symbols that will not be replaced by unicode normalization
    text = text.replace("～", "〜")

    # unicode normalization
    text = unicodedata.normalize("NFKC", text)

    # lowercase alphabetical characters
    text = text.lower()

    # remove kagi-kakkos
    text = re.sub(r"「(.*?)」", r"\1", text)
    text = re.sub(r"『(.*?)』", r"\1", text)

    # remove some punctuation marks
    text = text.replace("・", "")
    text = text.replace("=", "")
    text = text.replace("-", "")

    # compress whitespaces
    text = re.sub(r"\s+", "", text).strip()

    # remove parenthesis: 蛹化(ようか)　→　蛹化
    return re.sub(r"\((.*?)\)", "", text)

LastLineExtractor ¶

Extract the last line from a string.

Examples:

>>> from flexeval import LastLineExtractor
>>> processor = LastLineExtractor()
>>> text = "Answer\nFUJI-YAMA"
>>> print(processor(text))
FUJI-YAMA

Source code in flexeval/core/string_processor/last_line.py

class LastLineExtractor(StringProcessor):
    """Extract the last line from a string.

    Examples:
        >>> from flexeval import LastLineExtractor
        >>> processor = LastLineExtractor()
        >>> text = "Answer\\nFUJI-YAMA"
        >>> print(processor(text))
        FUJI-YAMA
    """

    def __call__(self, text: str) -> str:
        return text.split("\n")[-1]

call ¶

__call__(text: str) -> str

Source code in flexeval/core/string_processor/last_line.py

def __call__(self, text: str) -> str:
    return text.split("\n")[-1]

StringLower ¶

This processor returns a lowercased string.

Examples:

>>> from flexeval import StringLower
>>> processor = StringLower()
>>> text = "ABCDefg"
>>> normalized_text = processor(text)
>>> print(normalized_text)
abcdefg

Source code in flexeval/core/string_processor/lower.py

class StringLower(StringProcessor):
    """This processor returns a lowercased string.

    Examples:
        >>> from flexeval import StringLower
        >>> processor = StringLower()
        >>> text = "ABCDefg"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        abcdefg
    """

    def __call__(self, text: str) -> str:
        return text.lower()

call ¶

__call__(text: str) -> str

Source code in flexeval/core/string_processor/lower.py

def __call__(self, text: str) -> str:
    return text.lower()

NFKCNormalizer ¶

This processor returns a NFKC normalized string.

Examples:

>>> from flexeval import NFKCNormalizer
>>> processor = NFKCNormalizer()
>>> text = "０１２３ＡＢＣ"
>>> normalized_text = processor(text)
>>> print(normalized_text)
0123ABC

Source code in flexeval/core/string_processor/nfkc.py

class NFKCNormalizer(StringProcessor):
    """This processor returns a NFKC normalized string.

    Examples:
        >>> from flexeval import NFKCNormalizer
        >>> processor = NFKCNormalizer()
        >>> text = "０１２３ＡＢＣ"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        0123ABC
    """

    def __call__(self, text: str) -> str:
        return unicodedata.normalize("NFKC", text)

call ¶

__call__(text: str) -> str

Source code in flexeval/core/string_processor/nfkc.py

def __call__(self, text: str) -> str:
    return unicodedata.normalize("NFKC", text)

RegexExtractor ¶

StringProcessor that extracts the last match of a regex pattern. Useful to extract an answer after a step-by-step derivation.

Parameters:

pattern (str) –

The regex pattern to extract.

Examples:

>>> from flexeval import RegexExtractor
>>> processor = RegexExtractor(r"Answer: (.*)")
>>> text = "Step 1: 3 + 2 = 5\nStep 2: 5 × 4 = 20\nAnswer: 20"
>>> print(processor(text))
20

Source code in flexeval/core/string_processor/regex.py

class RegexExtractor(StringProcessor):
    """
    StringProcessor that extracts the last match of a regex pattern.
    Useful to extract an answer after a step-by-step derivation.

    Args:
        pattern: The regex pattern to extract.

    Examples:
        >>> from flexeval import RegexExtractor
        >>> processor = RegexExtractor(r"Answer: (.*)")
        >>> text = "Step 1: 3 + 2 = 5\\nStep 2: 5 × 4 = 20\\nAnswer: 20"
        >>> print(processor(text))
        20
    """

    def __init__(self, pattern: str) -> None:
        self._pattern = re.compile(pattern, flags=re.DOTALL)

    def __call__(self, text: str) -> str:
        found = self._pattern.findall(text)
        if not found:
            return ""
        return found[-1]

init ¶

__init__(pattern: str) -> None

Source code in flexeval/core/string_processor/regex.py

def __init__(self, pattern: str) -> None:
    self._pattern = re.compile(pattern, flags=re.DOTALL)

call ¶

__call__(text: str) -> str

Source code in flexeval/core/string_processor/regex.py

def __call__(self, text: str) -> str:
    found = self._pattern.findall(text)
    if not found:
        return ""
    return found[-1]

StringStrip ¶

Strip leading and trailing whitespaces from a string.

Examples:

>>> from flexeval import StringStrip
>>> processor = StringStrip()
>>> text = " ABC"
>>> normalized_text = processor(text)
>>> print(normalized_text)
ABC

Source code in flexeval/core/string_processor/string_strip.py

class StringStrip(StringProcessor):
    """Strip leading and trailing whitespaces from a string.

    Examples:
        >>> from flexeval import StringStrip
        >>> processor = StringStrip()
        >>> text = " ABC"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        ABC
    """

    def __call__(self, text: str) -> str:
        return text.strip()

call ¶

__call__(text: str) -> str

Source code in flexeval/core/string_processor/string_strip.py

def __call__(self, text: str) -> str:
    return text.strip()

TemplateRenderer ¶

Render a jinja2 template with a given string

Examples:

>>> from flexeval import TemplateRenderer
>>> processor = TemplateRenderer("This is a {{text}}")
>>> text = "ABC"
>>> normalized_text = processor(text)
>>> print(normalized_text)
This is a ABC

Source code in flexeval/core/string_processor/template.py

class TemplateRenderer(StringProcessor):
    """Render a jinja2 template with a given string

    Examples:
        >>> from flexeval import TemplateRenderer
        >>> processor = TemplateRenderer("This is a {{text}}")
        >>> text = "ABC"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        This is a ABC
    """

    def __init__(self, template: str) -> None:
        self._template = JINJA2_ENV.from_string(template)

    def __call__(self, text: str) -> str:
        return self._template.render(text=text)

init ¶

__init__(template: str) -> None

Source code in flexeval/core/string_processor/template.py

def __init__(self, template: str) -> None:
    self._template = JINJA2_ENV.from_string(template)

call ¶

__call__(text: str) -> str

Source code in flexeval/core/string_processor/template.py

def __call__(self, text: str) -> str:
    return self._template.render(text=text)

StringProcessor