Skip to content

Value Checkers

is_nonempty_value

Check if a value is not empty.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if it is not empty

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
14
15
16
17
18
19
20
21
22
23
24
25
26
def is_nonempty_value(value: Any) -> bool:
    """
    Check if a value is not empty.

    Args:
        value: value to check

    Returns:
        True if it is not empty
    """
    if isinstance(value, str) and len(value) == 0:
        return False
    return not pd.isna(value)

is_bool_like

Checks if a value is a bool or can be converted into a bool. It is case-insensitive, meaning that the words can also be capitalised.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if it conforms

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def is_bool_like(value: Any) -> bool:
    """
    Checks if a value is a bool or can be converted into a bool.
    It is case-insensitive, meaning that the words can also be capitalised.

    Args:
        value: value to check

    Returns:
        True if it conforms
    """
    value = str(value).lower().strip()
    if value in ("false", "0", "0.0", "no", "non", "nein"):
        return True
    elif value in ("true", "1", "1.0", "yes", "oui", "ja"):
        return True
    return False

is_color

Checks if a value is a color value.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if it conforms

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
48
49
50
51
52
53
54
55
56
57
58
59
def is_color(value: Any) -> bool:
    """
    Checks if a value is a color value.

    Args:
        value: value to check

    Returns:
        True if it conforms
    """

    return bool(regex.search(r"^#[0-9a-f]{6}$", str(value).strip(), flags=regex.IGNORECASE))

is_date

Checks if a value is a date value.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if it conforms

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def is_date(value: Any) -> bool:
    """
    Checks if a value is a date value.

    Args:
        value: value to check

    Returns:
        True if it conforms
    """

    calendar_optional = r"((GREGORIAN|JULIAN|ISLAMIC):)?"
    first_era_optional = r"((CE|BCE|BC|AD):)?"
    second_area_optional = r"(:(CE|BCE|BC|AD))?"
    date = r"\d{1,4}(?:-\d{1,2}){0,2}"
    date_mandatory = rf"({date})"
    date_optional = rf"(:{date})?"
    full_date_pattern = (
        rf"^{calendar_optional}{first_era_optional}{date_mandatory}{second_area_optional}{date_optional}$"
    )
    return bool(regex.search(full_date_pattern, str(value)))

is_geoname

Checks if a value is a geoname value.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if it conforms

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
85
86
87
88
89
90
91
92
93
94
95
def is_geoname(value: Any) -> bool:
    """
    Checks if a value is a geoname value.

    Args:
        value: value to check

    Returns:
        True if it conforms
    """
    return is_integer(value)

is_decimal

Checks if a value is a float, an integer, or a string which can be converted into a float.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if conforms to the above-mentioned criteria.

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def is_decimal(value: Any) -> bool:
    """
    Checks if a value is a float, an integer, or a string which can be converted into a float.

    Args:
        value: value to check

    Returns:
        True if conforms to the above-mentioned criteria.
    """
    if pd.isna(value):
        return False

    match value:
        case bool():
            return False
        case int() | float():
            return True
    try:
        float(value)
        return True
    except ValueError:
        return False

is_integer

Checks if a value is an integer or a string which can be converted into an integer.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if conforms to the above-mentioned criteria.

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def is_integer(value: Any) -> bool:
    """
    Checks if a value is an integer or a string which can be converted into an integer.

    Args:
        value: value to check

    Returns:
        True if conforms to the above-mentioned criteria.
    """
    match value:
        case bool():
            return False
        case int():
            return True
        case str():
            return bool(regex.search(r"^\d+$", value))
        case _:
            return False

is_string_like

Checks if a value is a string.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if it is a string

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def is_string_like(value: Any) -> bool:
    """
    Checks if a value is a string.

    Args:
        value: value to check

    Returns:
        True if it is a string
    """
    if pd.isna(value):
        return False
    value = str(value).strip()
    if len(value) == 0:
        return False
    return bool(regex.search(r"\S", value, flags=regex.UNICODE))

is_timestamp

Checks if a value is a valid timestamp.

Parameters:

Name Type Description Default
value Any

value to check

required

Returns:

Type Description
bool

True if it conforms

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
162
163
164
165
166
167
168
169
170
171
172
173
def is_timestamp(value: Any) -> bool:
    """
    Checks if a value is a valid timestamp.

    Args:
        value: value to check

    Returns:
        True if it conforms
    """
    validation_regex = r"^\d{4}-[0-1]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d(\.\d{1,12})?(Z|[+-][0-1]\d:[0-5]\d)$"
    return bool(regex.search(validation_regex, str(value)))

find_geometry_problem

Validates if a value is a valid geometry object.

Parameters:

Name Type Description Default
value Any

geometry object

required

Returns:

Type Description
str

String with the validation message if it fails, else an empty string.

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def find_geometry_problem(value: Any) -> str:
    """
    Validates if a value is a valid geometry object.

    Args:
        value: geometry object

    Returns:
        String with the validation message if it fails, else an empty string.
    """
    msg = ""
    try:
        value_as_dict = json.loads(str(value))
        if value_as_dict["type"] not in ["rectangle", "circle", "polygon"]:
            msg += "\nThe 'type' of the JSON geometry object must be 'rectangle', 'circle', or 'polygon'."

        if not isinstance(value_as_dict["points"], list):
            msg += "\nThe 'points' of the JSON geometry object must be a list of points."
    except (json.JSONDecodeError, TypeError, IndexError, KeyError, AssertionError):
        msg += f"\n'{value}' is not a valid JSON geometry object."
    return msg

is_dsp_iri

Check if a value is a valid internal DSP IRI.

Parameters:

Name Type Description Default
value Any

IRI

required

Returns:

Type Description
bool

True if it is valid, else false

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
199
200
201
202
203
204
205
206
207
208
209
def is_dsp_iri(value: Any) -> bool:
    """
    Check if a value is a valid internal DSP IRI.

    Args:
        value: IRI

    Returns:
        True if it is valid, else false
    """
    return bool(regex.search(r"^http://rdfh\.ch/\d{4}/", str(value)))

is_dsp_ark

Checks if a value is a valid ARK.

Parameters:

Name Type Description Default
value Any

ARK

required

Returns:

Type Description
bool

True if it is valid, else false

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
212
213
214
215
216
217
218
219
220
221
222
def is_dsp_ark(value: Any) -> bool:
    """
    Checks if a value is a valid ARK.

    Args:
        value: ARK

    Returns:
        True if it is valid, else false
    """
    return bool(regex.search(r"^ark:/", str(value)))

check_richtext_syntax

DSP richtexts must be convertible into valid XML. This checker escapes the reserved characters <, > and &, but only if they are not part of a standard standoff tag or escape sequence. Then, it tries to parse the resulting XML.

Note: Only DSP standard standoff tags are allowed in richtexts. They are documented here.

Parameters:

Name Type Description Default
richtext str

richtext to check

required

Warns:

Type Description
DspToolsUserWarning

if the input contains XML syntax problems

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/value_checkers.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def check_richtext_syntax(richtext: str) -> None:
    """
    DSP richtexts must be convertible into valid XML.
    This checker escapes the reserved characters `<`, `>` and `&`,
    but only if they are not part of a standard standoff tag or escape sequence.
    Then, it tries to parse the resulting XML.

    Note: Only DSP standard standoff tags are allowed in richtexts. They are documented
    [here](https://docs.dasch.swiss/latest/DSP-API/03-endpoints/api-v2/text/standard-standoff/).

    Args:
        richtext: richtext to check

    Warns:
        DspToolsUserWarning: if the input contains XML syntax problems
    """
    escaped_text = _escape_reserved_chars(richtext)
    # transform named entities (=character references) to numeric entities, e.g. &nbsp; -> &#160;
    num_ent = numeric_entities(escaped_text)
    pseudo_xml = f"<text>{num_ent}</text>"
    try:
        _ = etree.fromstring(pseudo_xml)
    except etree.XMLSyntaxError as err:
        prob = IllegalTagProblem(orig_err_msg=err.msg, pseudo_xml=pseudo_xml)
        warnings.warn(DspToolsUserWarning(prob.execute_error_protocol()))