summaryrefslogtreecommitdiff
path: root/dts/framework/parser.py
blob: 7254c75b71b117ce306810f80137ce6d8c4fcb20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2024 Arm Limited

"""Parsing utility module.

This module provides :class:`~TextParser` which can be used to model any dataclass to a block of
text.
"""

import re
from abc import ABC
from dataclasses import MISSING, dataclass, fields
from functools import partial
from typing import Any, Callable, TypedDict, cast

from typing_extensions import Self

from framework.exception import InternalError


class ParserFn(TypedDict):
    """Parser function in a dict compatible with the :func:`dataclasses.field` metadata param."""

    #:
    TextParser_fn: Callable[[str], Any]


@dataclass
class TextParser(ABC):
    r"""Helper abstract dataclass that parses a text according to the fields' rules.

    In order to enable text parsing in a dataclass, subclass it with :class:`TextParser`.

    The provided `parse` method is a factory which parses the supplied text and creates an instance
    with populated dataclass fields. This takes text as an argument and for each field in the
    dataclass, the field's parser function is run against the whole text. The returned value is then
    assigned to the field of the new instance. If the field does not have a parser function its
    default value or factory is used instead. If no default is available either, an exception is
    raised.

    This class provides a selection of parser functions and a function to wrap parser functions with
    generic functions. Parser functions are designed to be passed to the fields' metadata param. The
    most commonly used parser function is expected to be the `find` method, which runs a regular
    expression against the text to find matches.

    Example:
        The following example makes use of and demonstrates every parser function available:

        .. code:: python

            from dataclasses import dataclass, field
            from enum import Enum
            from framework.parser import TextParser

            class Colour(Enum):
                BLACK = 1
                WHITE = 2

                @classmethod
                def from_str(cls, text: str):
                    match text:
                        case "black":
                            return cls.BLACK
                        case "white":
                            return cls.WHITE
                        case _:
                            return None # unsupported colour

                @classmethod
                def make_parser(cls):
                    # make a parser function that finds a match and
                    # then makes it a Colour object through Colour.from_str
                    return TextParser.wrap(TextParser.find(r"is a (\w+)"), cls.from_str)

            @dataclass
            class Animal(TextParser):
                kind: str = field(metadata=TextParser.find(r"is a \w+ (\w+)"))
                name: str = field(metadata=TextParser.find(r"^(\w+)"))
                colour: Colour = field(metadata=Colour.make_parser())
                age: int = field(metadata=TextParser.find_int(r"aged (\d+)"))

            steph = Animal.parse("Stephanie is a white cat aged 10")
            print(steph) # Animal(kind='cat', name='Stephanie', colour=<Colour.WHITE: 2>, age=10)
    """

    """============ BEGIN PARSER FUNCTIONS ============"""

    @staticmethod
    def wrap(parser_fn: ParserFn, wrapper_fn: Callable) -> ParserFn:
        """Makes a wrapped parser function.

        `parser_fn` is called and if a non-None value is returned, `wrapper_function` is called with
        it. Otherwise the function returns early with None. In pseudo-code::

            intermediate_value := parser_fn(input)
            if intermediary_value is None then
                output := None
            else
                output := wrapper_fn(intermediate_value)

        Args:
            parser_fn: The dictionary storing the parser function to be wrapped.
            wrapper_fn: The function that wraps `parser_fn`.

        Returns:
            ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the
                newly wrapped parser function.
        """
        inner_fn = parser_fn["TextParser_fn"]

        def _composite_parser_fn(text: str) -> Any:
            intermediate_value = inner_fn(text)
            if intermediate_value is None:
                return None
            return wrapper_fn(intermediate_value)

        return ParserFn(TextParser_fn=_composite_parser_fn)

    @staticmethod
    def find(
        pattern: str | re.Pattern[str],
        flags: re.RegexFlag = re.RegexFlag(0),
        named: bool = False,
    ) -> ParserFn:
        """Makes a parser function that finds a regular expression match in the text.

        If the pattern has any capturing groups, it returns None if no match was found, otherwise a
        tuple containing the values per each group is returned. If the pattern has only one
        capturing group and a match was found, its value is returned. If the pattern has no
        capturing groups then either True or False is returned if the pattern had a match or not.

        Args:
            pattern: The regular expression pattern.
            flags: The regular expression flags. Ignored if the given pattern is already compiled.
            named: If set to True only the named capturing groups will be returned, as a dictionary.

        Returns:
            ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the find
                parser function.
        """
        if isinstance(pattern, str):
            pattern = re.compile(pattern, flags)

        def _find(text: str) -> Any:
            m = pattern.search(text)
            if m is None:
                return None if pattern.groups > 0 else False

            if pattern.groups == 0:
                return True

            if named:
                return m.groupdict()

            matches = m.groups()
            if len(matches) == 1:
                return matches[0]

            return matches

        return ParserFn(TextParser_fn=_find)

    @staticmethod
    def find_int(
        pattern: str | re.Pattern[str],
        flags: re.RegexFlag = re.RegexFlag(0),
        int_base: int = 0,
    ) -> ParserFn:
        """Makes a parser function that converts the match of :meth:`~find` to int.

        This function is compatible only with a pattern containing one capturing group.

        Args:
            pattern: The regular expression pattern.
            flags: The regular expression flags. Ignored if the given pattern is already compiled.
            int_base: The base of the number to convert from.

        Raises:
            InternalError: If the pattern does not have exactly one capturing group.

        Returns:
            ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the
                :meth:`~find` parser function wrapped by the int built-in.
        """
        if isinstance(pattern, str):
            pattern = re.compile(pattern, flags)

        if pattern.groups != 1:
            raise InternalError("only one capturing group is allowed with this parser function")

        return TextParser.wrap(TextParser.find(pattern), partial(int, base=int_base))

    """============ END PARSER FUNCTIONS ============"""

    @classmethod
    def parse(cls, text: str) -> Self:
        """Creates a new instance of the class from the given text.

        A new class instance is created with all the fields that have a parser function in their
        metadata. Fields without one are ignored and are expected to have a default value, otherwise
        the class initialization will fail.

        A field is populated with the value returned by its corresponding parser function.

        Args:
            text: the text to parse

        Raises:
            InternalError: if the parser did not find a match and the field does not have a default
                value or default factory.

        Returns:
            A new instance of the class.
        """
        fields_values = {}
        for field in fields(cls):
            parse = cast(ParserFn, field.metadata).get("TextParser_fn")
            if parse is None:
                continue

            value = parse(text)
            if value is not None:
                fields_values[field.name] = value
            elif field.default is MISSING and field.default_factory is MISSING:
                raise InternalError(
                    f"parser for field {field.name} returned None, but the field has no default"
                )

        return cls(**fields_values)