Skip to content

StringProcessor

StringProcessor

An interface class used to process the model's output before evaluation. Typically used in Metric.

Source code in flexeval/core/string_processor/base.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
class StringProcessor(ABC):
    """An interface class used to process the model's output before evaluation.
    Typically used in `Metric`.
    """

    @abstractmethod
    def __call__(self, text: str) -> str:
        """
        Process the input text.

        Args:
            text: The text to process.
        """
        raise NotImplementedError

__call__ abstractmethod

__call__(text: str) -> str

Process the input text.

Parameters:

  • text (str) –

    The text to process.

Source code in flexeval/core/string_processor/base.py
 9
10
11
12
13
14
15
16
17
@abstractmethod
def __call__(self, text: str) -> str:
    """
    Process the input text.

    Args:
        text: The text to process.
    """
    raise NotImplementedError

AIONormalizer

StringProcessor used for AI王 (AI king) question answering task. This is adapted from the official script.

Examples:

>>> from flexeval import AIONormalizer
>>> processor = AIONormalizer()
>>> text = "「蛹化(ようか)」"
>>> normalized_text = processor(text)
>>> print(normalized_text)
蛹化
Source code in flexeval/core/string_processor/aio.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class AIONormalizer(StringProcessor):
    """StringProcessor used for AI王 (AI king) question answering task.
    This is adapted from
    [the official script](https://github.com/cl-tohoku/aio4-bpr-baseline/blob/c5a226296b5e1c403268016dc7136147bbb515fe/compute_score.py).

    Examples:
        >>> from flexeval import AIONormalizer
        >>> processor = AIONormalizer()
        >>> text = "「蛹化(ようか)」"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        蛹化
    """

    def __call__(self, text: str) -> str:
        # substitute some symbols that will not be replaced by unicode normalization
        text = text.replace("~", "〜")

        # unicode normalization
        text = unicodedata.normalize("NFKC", text)

        # lowercase alphabetical characters
        text = text.lower()

        # remove kagi-kakkos
        text = re.sub(r"「(.*?)」", r"\1", text)
        text = re.sub(r"『(.*?)』", r"\1", text)

        # remove some punctuation marks
        text = text.replace("・", "")
        text = text.replace("=", "")
        text = text.replace("-", "")

        # compress whitespaces
        text = re.sub(r"\s+", "", text).strip()

        # remove parenthesis: 蛹化(ようか) → 蛹化
        return re.sub(r"\((.*?)\)", "", text)

__call__

__call__(text: str) -> str
Source code in flexeval/core/string_processor/aio.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __call__(self, text: str) -> str:
    # substitute some symbols that will not be replaced by unicode normalization
    text = text.replace("~", "〜")

    # unicode normalization
    text = unicodedata.normalize("NFKC", text)

    # lowercase alphabetical characters
    text = text.lower()

    # remove kagi-kakkos
    text = re.sub(r"「(.*?)」", r"\1", text)
    text = re.sub(r"『(.*?)』", r"\1", text)

    # remove some punctuation marks
    text = text.replace("・", "")
    text = text.replace("=", "")
    text = text.replace("-", "")

    # compress whitespaces
    text = re.sub(r"\s+", "", text).strip()

    # remove parenthesis: 蛹化(ようか) → 蛹化
    return re.sub(r"\((.*?)\)", "", text)

LastLineExtractor

Extract the last line from a string.

Examples:

>>> from flexeval import LastLineExtractor
>>> processor = LastLineExtractor()
>>> text = "Answer\nFUJI-YAMA"
>>> print(processor(text))
FUJI-YAMA
Source code in flexeval/core/string_processor/last_line.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
class LastLineExtractor(StringProcessor):
    """Extract the last line from a string.

    Examples:
        >>> from flexeval import LastLineExtractor
        >>> processor = LastLineExtractor()
        >>> text = "Answer\\nFUJI-YAMA"
        >>> print(processor(text))
        FUJI-YAMA
    """

    def __call__(self, text: str) -> str:
        return text.split("\n")[-1]

__call__

__call__(text: str) -> str
Source code in flexeval/core/string_processor/last_line.py
15
16
def __call__(self, text: str) -> str:
    return text.split("\n")[-1]

StringLower

This processor returns a lowercased string.

Examples:

>>> from flexeval import StringLower
>>> processor = StringLower()
>>> text = "ABCDefg"
>>> normalized_text = processor(text)
>>> print(normalized_text)
abcdefg
Source code in flexeval/core/string_processor/lower.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
class StringLower(StringProcessor):
    """This processor returns a lowercased string.

    Examples:
        >>> from flexeval import StringLower
        >>> processor = StringLower()
        >>> text = "ABCDefg"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        abcdefg
    """

    def __call__(self, text: str) -> str:
        return text.lower()

__call__

__call__(text: str) -> str
Source code in flexeval/core/string_processor/lower.py
16
17
def __call__(self, text: str) -> str:
    return text.lower()

NFKCNormalizer

This processor returns a NFKC normalized string.

Examples:

>>> from flexeval import NFKCNormalizer
>>> processor = NFKCNormalizer()
>>> text = "0123ABC"
>>> normalized_text = processor(text)
>>> print(normalized_text)
0123ABC
Source code in flexeval/core/string_processor/nfkc.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
class NFKCNormalizer(StringProcessor):
    """This processor returns a NFKC normalized string.

    Examples:
        >>> from flexeval import NFKCNormalizer
        >>> processor = NFKCNormalizer()
        >>> text = "0123ABC"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        0123ABC
    """

    def __call__(self, text: str) -> str:
        return unicodedata.normalize("NFKC", text)

__call__

__call__(text: str) -> str
Source code in flexeval/core/string_processor/nfkc.py
18
19
def __call__(self, text: str) -> str:
    return unicodedata.normalize("NFKC", text)

RegexExtractor

StringProcessor that extracts the last match of a regex pattern. Useful to extract an answer after a step-by-step derivation.

Parameters:

  • pattern (str) –

    The regex pattern to extract.

Examples:

>>> from flexeval import RegexExtractor
>>> processor = RegexExtractor(r"Answer: (.*)")
>>> text = "Step 1: 3 + 2 = 5\nStep 2: 5 × 4 = 20\nAnswer: 20"
>>> print(processor(text))
20
Source code in flexeval/core/string_processor/regex.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class RegexExtractor(StringProcessor):
    """
    StringProcessor that extracts the last match of a regex pattern.
    Useful to extract an answer after a step-by-step derivation.

    Args:
        pattern: The regex pattern to extract.

    Examples:
        >>> from flexeval import RegexExtractor
        >>> processor = RegexExtractor(r"Answer: (.*)")
        >>> text = "Step 1: 3 + 2 = 5\\nStep 2: 5 × 4 = 20\\nAnswer: 20"
        >>> print(processor(text))
        20
    """

    def __init__(self, pattern: str) -> None:
        self._pattern = re.compile(pattern, flags=re.DOTALL)

    def __call__(self, text: str) -> str:
        found = self._pattern.findall(text)
        if not found:
            return ""
        return found[-1]

__init__

__init__(pattern: str) -> None
Source code in flexeval/core/string_processor/regex.py
22
23
def __init__(self, pattern: str) -> None:
    self._pattern = re.compile(pattern, flags=re.DOTALL)

__call__

__call__(text: str) -> str
Source code in flexeval/core/string_processor/regex.py
25
26
27
28
29
def __call__(self, text: str) -> str:
    found = self._pattern.findall(text)
    if not found:
        return ""
    return found[-1]

StringStrip

Strip leading and trailing whitespaces from a string.

Examples:

>>> from flexeval import StringStrip
>>> processor = StringStrip()
>>> text = " ABC"
>>> normalized_text = processor(text)
>>> print(normalized_text)
ABC
Source code in flexeval/core/string_processor/string_strip.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
class StringStrip(StringProcessor):
    """Strip leading and trailing whitespaces from a string.

    Examples:
        >>> from flexeval import StringStrip
        >>> processor = StringStrip()
        >>> text = " ABC"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        ABC
    """

    def __call__(self, text: str) -> str:
        return text.strip()

__call__

__call__(text: str) -> str
Source code in flexeval/core/string_processor/string_strip.py
16
17
def __call__(self, text: str) -> str:
    return text.strip()

TemplateRenderer

Render a jinja2 template with a given string

Examples:

>>> from flexeval import TemplateRenderer
>>> processor = TemplateRenderer("This is a {{text}}")
>>> text = "ABC"
>>> normalized_text = processor(text)
>>> print(normalized_text)
This is a ABC
Source code in flexeval/core/string_processor/template.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
class TemplateRenderer(StringProcessor):
    """Render a jinja2 template with a given string

    Examples:
        >>> from flexeval import TemplateRenderer
        >>> processor = TemplateRenderer("This is a {{text}}")
        >>> text = "ABC"
        >>> normalized_text = processor(text)
        >>> print(normalized_text)
        This is a ABC
    """

    def __init__(self, template: str) -> None:
        self._template = JINJA2_ENV.from_string(template)

    def __call__(self, text: str) -> str:
        return self._template.render(text=text)

__init__

__init__(template: str) -> None
Source code in flexeval/core/string_processor/template.py
18
19
def __init__(self, template: str) -> None:
    self._template = JINJA2_ENV.from_string(template)

__call__

__call__(text: str) -> str
Source code in flexeval/core/string_processor/template.py
21
22
def __call__(self, text: str) -> str:
    return self._template.render(text=text)