Helpers

`ListLookup`

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

@dataclass
class ListLookup:
    _lookup: dict[str, dict[str, str]]
    _prop_to_list_name: dict[str, str]
    _label_language: str

    @staticmethod
    def create_new(project_json_path: str | Path, language_of_label: str, default_ontology: str) -> ListLookup:
        """
        Creates a list lookup based on list labels in a specified language and returning list node names.
        Works for all lists in a project.json

        Args:
            project_json_path: path to a JSON project file (a.k.a. ontology)
            language_of_label: label language used for the list
            default_ontology: ontology prefix which is defined as default in the XML file

        Returns:
            `ListLookup` for a project

        Examples:
            ```python
            list_lookup = xmllib.ListLookup.create_new(
                project_json_path="project.json",
                language_of_label="en",
                default_ontology="default-onto",
            )
            ```
        """
        with open(project_json_path, encoding="utf-8") as f:
            json_file = json.load(f)
        label_to_list_node_lookup = _get_label_to_node_all_lists(json_file["project"]["lists"], language_of_label)
        prop_to_list_mapper = _get_property_to_list_name_mapping(json_file["project"]["ontologies"], default_ontology)
        return ListLookup(
            _lookup=label_to_list_node_lookup,
            _prop_to_list_name=prop_to_list_mapper,
            _label_language=language_of_label,
        )

    def get_node_via_list_name(self, list_name: str, node_label: str) -> str:
        """
        Returns the list node name based on a label.
        The language of the label was specified when creating the `ListLookup`.

        Args:
            list_name: name of the list
            node_label: label of the node

        Returns:
            node name

        Examples:
            ```python
            node_name = list_lookup.get_node_via_list_name(
                list_name="list1",
                node_label="Label 1"  # or: "label 1" (capitalisation is not relevant)
            )
            # node_name == "node1"
            ```
        """
        if not (list_lookup := self._lookup.get(list_name)):
            emit_xmllib_input_warning(
                MessageInfo(f"The entered list name '{list_name}' was not found. An empty string is returned.")
            )
            return ""
        if not (found_node := list_lookup.get(node_label)):
            emit_xmllib_input_warning(
                MessageInfo(
                    f"'{node_label}' was not recognised as label of the list '{list_name}'. "
                    f"This ListLookup is configured for '{self._label_language}' labels. An empty string is returned."
                )
            )
            return ""
        return found_node

    def get_list_name_and_node_via_property(self, prop_name: str, node_label: str) -> tuple[str, str]:
        """
        Returns the list name and the node name based on a property that is used with the list and the label of a node.
        The language of the label was specified when creating the `ListLookup`.
        The list name needs to be referenced in the XML file.

        Args:
            prop_name: name of the list
            node_label: label of the node

        Returns:
            list name and node name

        Examples:
            ```python
            list_name, node_name = list_lookup.get_list_name_and_node_via_property(
                prop_name=":hasList",  # or: "default-onto:hasList"
                node_label="label 1"
            )
            # list_name == "list1"
            # node_name == "node1"
            ```
        """
        if not (list_name := self.get_list_name_via_property(prop_name)):
            return "", ""
        return list_name, self.get_node_via_list_name(list_name, node_label)

    def get_list_name_via_property(self, prop_name: str) -> str:
        """
        Returns the list name as specified in the ontology for a property.
        The list name needs to be referenced in the XML file.

        Args:
            prop_name: name of the property

        Returns:
            Name of the list

        Examples:
            ```python
            list_name = list_lookup.get_list_name_via_property(
                prop_name=":hasList",  # or: "default-onto:hasList"
            )
            # list_name == "list1"
            ```
        """
        if not (list_name := self._prop_to_list_name.get(prop_name)):
            emit_xmllib_input_warning(
                MessageInfo(f"The entered property '{prop_name}' was not found. An empty string is returned.")
            )
            return ""
        return list_name

`create_new`

Creates a list lookup based on list labels in a specified language and returning list node names. Works for all lists in a project.json

Parameters:

Name	Type	Description	Default
`project_json_path`	`str \| Path`	path to a JSON project file (a.k.a. ontology)	required
`language_of_label`	`str`	label language used for the list	required
`default_ontology`	`str`	ontology prefix which is defined as default in the XML file	required

Returns:

Type	Description
`ListLookup`	`ListLookup` for a project

Examples:

list_lookup = xmllib.ListLookup.create_new(
    project_json_path="project.json",
    language_of_label="en",
    default_ontology="default-onto",
)

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

@staticmethod
def create_new(project_json_path: str | Path, language_of_label: str, default_ontology: str) -> ListLookup:
    """
    Creates a list lookup based on list labels in a specified language and returning list node names.
    Works for all lists in a project.json

    Args:
        project_json_path: path to a JSON project file (a.k.a. ontology)
        language_of_label: label language used for the list
        default_ontology: ontology prefix which is defined as default in the XML file

    Returns:
        `ListLookup` for a project

    Examples:
        ```python
        list_lookup = xmllib.ListLookup.create_new(
            project_json_path="project.json",
            language_of_label="en",
            default_ontology="default-onto",
        )
        ```
    """
    with open(project_json_path, encoding="utf-8") as f:
        json_file = json.load(f)
    label_to_list_node_lookup = _get_label_to_node_all_lists(json_file["project"]["lists"], language_of_label)
    prop_to_list_mapper = _get_property_to_list_name_mapping(json_file["project"]["ontologies"], default_ontology)
    return ListLookup(
        _lookup=label_to_list_node_lookup,
        _prop_to_list_name=prop_to_list_mapper,
        _label_language=language_of_label,
    )

`get_node_via_list_name`

Returns the list node name based on a label. The language of the label was specified when creating the ListLookup.

Parameters:

Name	Type	Description	Default
`list_name`	`str`	name of the list	required
`node_label`	`str`	label of the node	required

Returns:

Type	Description
`str`	node name

Examples:

node_name = list_lookup.get_node_via_list_name(
    list_name="list1",
    node_label="Label 1"  # or: "label 1" (capitalisation is not relevant)
)
# node_name == "node1"

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def get_node_via_list_name(self, list_name: str, node_label: str) -> str:
    """
    Returns the list node name based on a label.
    The language of the label was specified when creating the `ListLookup`.

    Args:
        list_name: name of the list
        node_label: label of the node

    Returns:
        node name

    Examples:
        ```python
        node_name = list_lookup.get_node_via_list_name(
            list_name="list1",
            node_label="Label 1"  # or: "label 1" (capitalisation is not relevant)
        )
        # node_name == "node1"
        ```
    """
    if not (list_lookup := self._lookup.get(list_name)):
        emit_xmllib_input_warning(
            MessageInfo(f"The entered list name '{list_name}' was not found. An empty string is returned.")
        )
        return ""
    if not (found_node := list_lookup.get(node_label)):
        emit_xmllib_input_warning(
            MessageInfo(
                f"'{node_label}' was not recognised as label of the list '{list_name}'. "
                f"This ListLookup is configured for '{self._label_language}' labels. An empty string is returned."
            )
        )
        return ""
    return found_node

`get_list_name_and_node_via_property`

Returns the list name and the node name based on a property that is used with the list and the label of a node. The language of the label was specified when creating the ListLookup. The list name needs to be referenced in the XML file.

Parameters:

Name	Type	Description	Default
`prop_name`	`str`	name of the list	required
`node_label`	`str`	label of the node	required

Returns:

Type	Description
`tuple[str, str]`	list name and node name

Examples:

list_name, node_name = list_lookup.get_list_name_and_node_via_property(
    prop_name=":hasList",  # or: "default-onto:hasList"
    node_label="label 1"
)
# list_name == "list1"
# node_name == "node1"

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def get_list_name_and_node_via_property(self, prop_name: str, node_label: str) -> tuple[str, str]:
    """
    Returns the list name and the node name based on a property that is used with the list and the label of a node.
    The language of the label was specified when creating the `ListLookup`.
    The list name needs to be referenced in the XML file.

    Args:
        prop_name: name of the list
        node_label: label of the node

    Returns:
        list name and node name

    Examples:
        ```python
        list_name, node_name = list_lookup.get_list_name_and_node_via_property(
            prop_name=":hasList",  # or: "default-onto:hasList"
            node_label="label 1"
        )
        # list_name == "list1"
        # node_name == "node1"
        ```
    """
    if not (list_name := self.get_list_name_via_property(prop_name)):
        return "", ""
    return list_name, self.get_node_via_list_name(list_name, node_label)

`get_list_name_via_property`

Returns the list name as specified in the ontology for a property. The list name needs to be referenced in the XML file.

Parameters:

Name	Type	Description	Default
`prop_name`	`str`	name of the property	required

Returns:

Type	Description
`str`	Name of the list

Examples:

list_name = list_lookup.get_list_name_via_property(
    prop_name=":hasList",  # or: "default-onto:hasList"
)
# list_name == "list1"

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def get_list_name_via_property(self, prop_name: str) -> str:
    """
    Returns the list name as specified in the ontology for a property.
    The list name needs to be referenced in the XML file.

    Args:
        prop_name: name of the property

    Returns:
        Name of the list

    Examples:
        ```python
        list_name = list_lookup.get_list_name_via_property(
            prop_name=":hasList",  # or: "default-onto:hasList"
        )
        # list_name == "list1"
        ```
    """
    if not (list_name := self._prop_to_list_name.get(prop_name)):
        emit_xmllib_input_warning(
            MessageInfo(f"The entered property '{prop_name}' was not found. An empty string is returned.")
        )
        return ""
    return list_name

`create_footnote_string`

Takes the text for a footnote, and returns a string with the correct formatting. You can use this if you want to add the footnote to a string. Currently, the newline replacement options are restricted to LINEBREAK and NONE. The reserved characters <, > and & will be escaped temporarily, but they will be correctly displayed in DSP-APP.

Attention

The text in the footnote may be richtext, i.e. contain XML tags.
Not all tags supported in ordinary richtext are currently implemented.
The allowed tags are:
-   (break line)
-  (bold)
-  (italic)
-  (underline)
- <strike> (strike through)
- <a href="URI"> (link to a URI)
- <a class="salsah-link" href="Knora IRI"> (link to a resource)

Parameters:

Name	Type	Description	Default
`footnote_text`	`str`	Text for the footnote	required
`newline_replacement_option`	`NewlineReplacement`	options to replace newlines	`LINEBREAK`

Returns:

Type	Description
`str`	The footnote as a string

Examples:

result = xmllib.create_footnote_string("Text")
# result == '<footnote content="Text"/>'

result = xmllib.create_footnote_string("Text\nSecond Line")
# result == '<footnote content="Text&lt;br/&gt;Second Line"/>'

result = xmllib.create_footnote_string("Already escaped &lt;&gt;")
# already escaped characters will not be escaped again
# result == '<footnote content="Already escaped &lt;&gt;"/>'

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def create_footnote_string(
    footnote_text: str, newline_replacement_option: NewlineReplacement = NewlineReplacement.LINEBREAK
) -> str:
    """
    Takes the text for a footnote, and returns a string with the correct formatting.
    You can use this if you want to add the footnote to a string.
    Currently, the newline replacement options are restricted to `LINEBREAK` and `NONE`.
    The reserved characters `<`, `>` and `&` will be escaped temporarily,
    but they will be correctly displayed in DSP-APP.

    Attention:
        - The text in the footnote may be richtext, i.e. contain XML tags.
        - Not all tags supported in ordinary richtext are currently implemented.
        - The allowed tags are:
            - `<br>` (break line)
            - `<strong>` (bold)
            - `<em>` (italic)
            - `<u>` (underline)
            - `<strike>` (strike through)
            - `<a href="URI">` (link to a URI)
            - `<a class="salsah-link" href="Knora IRI">` (link to a resource)

    Args:
        footnote_text: Text for the footnote
        newline_replacement_option: options to replace newlines

    Raises:
        XmllibInputError: If the text is empty, or if a newline replacement which is not implemented is entered

    Returns:
        The footnote as a string

    Examples:
        ```python
        result = xmllib.create_footnote_string("Text")
        # result == '<footnote content="Text"/>'
        ```

        ```python
        result = xmllib.create_footnote_string("Text\\nSecond Line")
        # result == '<footnote content="Text&lt;br/&gt;Second Line"/>'
        ```

        ```python
        result = xmllib.create_footnote_string("Already escaped &lt;&gt;")
        # already escaped characters will not be escaped again
        # result == '<footnote content="Already escaped &lt;&gt;"/>'
        ```
    """
    text_tag = create_footnote_element(footnote_text, newline_replacement_option)
    return etree.tostring(text_tag, encoding="unicode")

`create_footnote_element`

Takes the text for a footnote, and returns an etree.Element. You can use this if you are working with lxml. Currently, the newline replacement options are restricted to LINEBREAK and NONE.

Attention

The text in the footnote may be richtext, i.e. contain XML tags.
Not all tags supported in ordinary richtext are currently implemented.
The allowed tags are:
-   (break line)
-  (bold)
-  (italic)
-  (underline)
- <strike> (strike through)
- <a href="URI"> (link to a URI)
- <a class="salsah-link" href="Knora IRI"> (link to a resource)

Parameters:

Name	Type	Description	Default
`footnote_text`	`str`	Text for the footnote	required
`newline_replacement_option`	`NewlineReplacement`	options to replace newlines	`LINEBREAK`

Returns:

Type	Description
`_Element`	The footnote as a string

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def create_footnote_element(
    footnote_text: str, newline_replacement_option: NewlineReplacement = NewlineReplacement.LINEBREAK
) -> etree._Element:
    """
    Takes the text for a footnote, and returns an `etree.Element`.
    You can use this if you are working with `lxml`.
    Currently, the newline replacement options are restricted to `LINEBREAK` and `NONE`.

    Attention:
        - The text in the footnote may be richtext, i.e. contain XML tags.
        - Not all tags supported in ordinary richtext are currently implemented.
        - The allowed tags are:
            - `<br>` (break line)
            - `<strong>` (bold)
            - `<em>` (italic)
            - `<u>` (underline)
            - `<strike>` (strike through)
            - `<a href="URI">` (link to a URI)
            - `<a class="salsah-link" href="Knora IRI">` (link to a resource)

    Args:
        footnote_text: Text for the footnote
        newline_replacement_option: options to replace newlines

    Raises:
        XmllibInputError: If the text is empty, or if a newline replacement which is not implemented is entered

    Returns:
        The footnote as a string
    """
    if newline_replacement_option not in {NewlineReplacement.LINEBREAK, NewlineReplacement.NONE}:
        raise_xmllib_input_error(
            MessageInfo("Currently the only supported newline replacement is linebreak (<br/>) or None.")
        )
    if not is_nonempty_value_internal(footnote_text):
        raise_xmllib_input_error(MessageInfo("The input value is empty."))
    footnote_text = replace_newlines_with_tags(str(footnote_text), newline_replacement_option)
    unescaped_text = unescape_reserved_xml_chars(footnote_text)
    return etree.Element("footnote", attrib={"content": unescaped_text})

`create_standoff_link_to_resource`

Creates a standoff link to a resource.

Parameters:

Name	Type	Description	Default
`resource_id`	`str`	ID of the resource that is linked	required
`displayed_text`	`str`	text to display for the embedded link	required

Returns:

Type	Description
`str`	A standoff link in string form.

Examples:

result = xmllib.create_standoff_link_to_resource("resource_id", "Text")
# result == '<a class="salsah-link" href="IRI:resource_id:IRI">Text</a>'

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def create_standoff_link_to_resource(resource_id: str, displayed_text: str) -> str:
    """
    Creates a standoff link to a resource.

    Args:
        resource_id: ID of the resource that is linked
        displayed_text: text to display for the embedded link

    Returns:
        A standoff link in string form.

    Raises:
        XmllibInputError: if the resource ID or the displayed text are empty

    Examples:
        ```python
        result = xmllib.create_standoff_link_to_resource("resource_id", "Text")
        # result == '<a class="salsah-link" href="IRI:resource_id:IRI">Text</a>'
        ```
    """
    if not all([is_nonempty_value_internal(resource_id), is_nonempty_value_internal(displayed_text)]):
        msg_str = (
            f"The entered resource ID and displayed text may not be empty. "
            f"Your input: resource_id '{resource_id}' / displayed_text '{displayed_text}'"
        )
        raise_xmllib_input_error(MessageInfo(msg_str))
    attribs = {"class": "salsah-link", "href": f"IRI:{resource_id}:IRI"}
    ele = etree.Element("a", attrib=attribs)
    ele.text = displayed_text
    return etree.tostring(ele, encoding="unicode")

`create_standoff_link_to_uri`

Creates a standoff link to a URI.

Parameters:

Name	Type	Description	Default
`uri`	`str`	the target URI that should be linked to	required
`displayed_text`	`str`	text to display for the embedded link	required

Returns:

Type	Description
`str`	A standoff link in string form.

Examples:

result = xmllib.create_standoff_link_to_uri("https://www.dasch.swiss/", "This is DaSCH")
# result == '<a href="https://www.dasch.swiss/">This is DaSCH</a>'

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def create_standoff_link_to_uri(uri: str, displayed_text: str) -> str:
    """
    Creates a standoff link to a URI.

    Args:
        uri: the target URI that should be linked to
        displayed_text: text to display for the embedded link

    Returns:
        A standoff link in string form.

    Raises:
        XmllibInputError: if the URI or the displayed text are empty

    Examples:
        ```python
        result = xmllib.create_standoff_link_to_uri("https://www.dasch.swiss/", "This is DaSCH")
        # result == '<a href="https://www.dasch.swiss/">This is DaSCH</a>'
        ```
    """
    if not all([is_nonempty_value_internal(uri), is_nonempty_value_internal(displayed_text)]):
        msg_str = (
            f"The entered URI and displayed text may not be empty. "
            f"Your input: uri '{uri}' / displayed_text '{displayed_text}'"
        )
        raise_xmllib_input_error(MessageInfo(msg_str))
    attribs = {"href": uri}
    ele = etree.Element("a", attrib=attribs)
    ele.text = displayed_text
    return etree.tostring(ele, encoding="unicode")

`get_list_nodes_from_string_via_list_name`

Resolves list labels to node names.

Parameters:

Name	Type	Description	Default
`string_with_list_labels`	`str`	the string containing list labels	required
`label_separator`	`str`	separator in the string that contains the labels	required
`list_name`	`str`	name of the list	required
`list_lookup`	`ListLookup`	`ListLookup` of the project	required

Returns:

Type	Description
`list[str]`	A list of node names. If the string is empty, it returns an empty list.

Examples:

string_with_list_labels = "Label 1; Label 2"
nodes = xmllib.get_list_nodes_from_string_via_list_name(
    string_with_list_labels=string_with_list_labels,
    label_separator=";",
    list_name="list1",
    list_lookup=list_lookup,
)
# nodes == ["node1", "node2"]

string_with_list_labels = ""
nodes = xmllib.get_list_nodes_from_string_via_list_name(
    string_with_list_labels=string_with_list_labels,
    label_separator=";",
    list_name="list1",
    list_lookup=list_lookup,
)
# nodes == []

string_with_list_labels = pd.NA
nodes = xmllib.get_list_nodes_from_string_via_list_name(
    string_with_list_labels=string_with_list_labels,
    label_separator=";",
    list_name="list1",
    list_lookup=list_lookup,
)
# nodes == []

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def get_list_nodes_from_string_via_list_name(
    string_with_list_labels: str, label_separator: str, list_name: str, list_lookup: ListLookup
) -> list[str]:
    """
    Resolves list labels to node names.

    Args:
        string_with_list_labels: the string containing list labels
        label_separator: separator in the string that contains the labels
        list_name: name of the list
        list_lookup: `ListLookup` of the project

    Returns:
        A list of node names. If the string is empty, it returns an empty list.

    Examples:
        ```python
        string_with_list_labels = "Label 1; Label 2"
        nodes = xmllib.get_list_nodes_from_string_via_list_name(
            string_with_list_labels=string_with_list_labels,
            label_separator=";",
            list_name="list1",
            list_lookup=list_lookup,
        )
        # nodes == ["node1", "node2"]
        ```

        ```python
        string_with_list_labels = ""
        nodes = xmllib.get_list_nodes_from_string_via_list_name(
            string_with_list_labels=string_with_list_labels,
            label_separator=";",
            list_name="list1",
            list_lookup=list_lookup,
        )
        # nodes == []
        ```

        ```python
        string_with_list_labels = pd.NA
        nodes = xmllib.get_list_nodes_from_string_via_list_name(
            string_with_list_labels=string_with_list_labels,
            label_separator=";",
            list_name="list1",
            list_lookup=list_lookup,
        )
        # nodes == []
        ```
    """
    if not is_nonempty_value_internal(string_with_list_labels):
        return []
    labels_list = create_list_from_string(string_with_list_labels, label_separator)
    nodes_list = [list_lookup.get_node_via_list_name(list_name, label) for label in labels_list]
    return nodes_list

`get_list_nodes_from_string_via_property`

Takes a string containing list labels, the separator by which they can be split, a property name and the list lookup. Resolves the labels and returns the list name to be referenced in the XML file and a list of node names. If the string is empty, it returns an empty list.

Parameters:

Name	Type	Description	Default
`string_with_list_labels`	`str`	the string containing the labels	required
`label_separator`	`str`	separator in the string that contains the labels	required
`property_name`	`str`	name of the property	required
`list_lookup`	`ListLookup`	`ListLookup` of the project	required

Returns:

Type	Description
`tuple[str, list[str]]`	The name of the list and a list of node names.

Examples:

string_with_list_labels = "Label 1; Label 2"
list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
    string_with_list_labels=string_with_list_labels,
    label_separator=";",
    property_name=":hasList",
    list_lookup=list_lookup,
)
# list_name == "list1"
# nodes == ["node1", "node2"]

string_with_list_labels = ""
list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
    string_with_list_labels=string_with_list_labels,
    label_separator=";",
    property_name=":hasList",
    list_lookup=list_lookup,
)
# list_name == ""
# nodes == []

string_with_list_labels = pd.NA
list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
    string_with_list_labels=string_with_list_labels,
    label_separator=";",
    property_name=":hasList",
    list_lookup=list_lookup,
)
# list_name == ""
# nodes == []

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def get_list_nodes_from_string_via_property(
    string_with_list_labels: str, label_separator: str, property_name: str, list_lookup: ListLookup
) -> tuple[str, list[str]]:
    """
    Takes a string containing list labels, the separator by which they can be split,
    a property name and the list lookup.
    Resolves the labels and returns the list name to be referenced in the XML file and a list of node names.
    If the string is empty, it returns an empty list.

    Args:
        string_with_list_labels: the string containing the labels
        label_separator: separator in the string that contains the labels
        property_name: name of the property
        list_lookup: `ListLookup` of the project

    Returns:
        The name of the list and a list of node names.

    Examples:
        ```python
        string_with_list_labels = "Label 1; Label 2"
        list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
            string_with_list_labels=string_with_list_labels,
            label_separator=";",
            property_name=":hasList",
            list_lookup=list_lookup,
        )
        # list_name == "list1"
        # nodes == ["node1", "node2"]
        ```

        ```python
        string_with_list_labels = ""
        list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
            string_with_list_labels=string_with_list_labels,
            label_separator=";",
            property_name=":hasList",
            list_lookup=list_lookup,
        )
        # list_name == ""
        # nodes == []
        ```

        ```python
        string_with_list_labels = pd.NA
        list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
            string_with_list_labels=string_with_list_labels,
            label_separator=";",
            property_name=":hasList",
            list_lookup=list_lookup,
        )
        # list_name == ""
        # nodes == []
        ```
    """
    if not is_nonempty_value_internal(string_with_list_labels):
        return "", []
    labels_list = create_list_from_string(string_with_list_labels, label_separator)
    list_name = ""
    nodes = []
    for lbl in labels_list:
        list_name, node_name = list_lookup.get_list_name_and_node_via_property(property_name, lbl)
        nodes.append(node_name)
    return list_name, nodes

`escape_reserved_xml_characters`

From richtext strings (encoding="xml"), escape the reserved characters <, > and &, but only if they are not part of a standard standoff tag or escape sequence.

See the documentation for the standard standoff tags allowed by DSP-API, which will not be escaped.

Parameters:

Name	Type	Description	Default
`text`	`str`	the richtext string to be escaped	required

Returns:

Type	Description
`str`	The escaped richtext string

Examples:

result = xmllib.escape_reserved_xml_characters("Text <unknownTag>")
# result == "Text &lt;unknownTag&gt;"

result = xmllib.escape_reserved_xml_characters("Text <br/> text after")
# result == "Text <br/> text after"

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def escape_reserved_xml_characters(text: str) -> str:
    """
    From richtext strings (encoding="xml"), escape the reserved characters `<`, `>` and `&`,
    but only if they are not part of a standard standoff tag or escape sequence.

    [See the documentation for the standard standoff tags allowed by DSP-API,
    which will not be escaped.](https://docs.dasch.swiss/latest/DSP-API/03-endpoints/api-v2/text/standard-standoff/)

    Args:
        text: the richtext string to be escaped

    Returns:
        The escaped richtext string

    Examples:
        ```python
        result = xmllib.escape_reserved_xml_characters("Text <unknownTag>")
        # result == "Text &lt;unknownTag&gt;"
        ```

        ```python
        result = xmllib.escape_reserved_xml_characters("Text <br/> text after")
        # result == "Text <br/> text after"
        ```
    """
    allowed_tags_regex = "|".join(KNOWN_XML_TAG_REGEXES)
    lookahead = rf"(?!/?({allowed_tags_regex})/?>)"
    illegal_lt = rf"<{lookahead}"
    lookbehind = rf"(?<!</?({allowed_tags_regex})/?)"
    illegal_gt = rf"{lookbehind}>"
    illegal_amp = r"&(?![#a-zA-Z0-9]+;)"
    text = regex.sub(illegal_lt, "&lt;", text)
    text = regex.sub(illegal_gt, "&gt;", text)
    text = regex.sub(illegal_amp, "&amp;", text)
    return text

`reformat_date`

Reformats a date string into the DSP format.

If the input cannot be reformatted according to the configuration, or if the result is not a valid DSP date, a warning is emitted and the original input is returned.
If the input is empty, a warning is emitted and an empty string is returned.
If the input is already a correctly formatted DSP-date, the original input is returned.

Parameters:

Name	Type	Description	Default
`date`	`str \| int`	date string to be reformatted	required
`date_precision_separator`	`str \| None`	the separation between the day, month and year	required
`date_range_separator`	`str \| None`	the separation between two dates	required
`date_format`	`DateFormat`	the format of the date, see `DateFormat` for options	required
`calendar`	`Calendar`	the calendar of the date, see `Calendar` for options	`GREGORIAN`
`era`	`Era \| None`	the era of the date, see `Era` for options	`CE`
`resource_id`	`str \| None`	the ID of the associated resource, this is to improve the error message	`None`

Returns:

Type	Description
`str`	A reformatted date or the original input if the reformatted result is not a valid DSP date

Examples:

# default configuration, starting with the day
result = xmllib.reformat_date(
    date="1.11.2000",
    date_precision_separator=".",
    date_range_separator=None,
    date_format=xmllib.DateFormat.DD_MM_YYYY
)
# result == "GREGORIAN:CE:2000-11-1:CE:2000-11-1"

# default configuration, but starting with the year
result = xmllib.reformat_date(
    date="2000.11.1",
    date_precision_separator=".",
    date_range_separator=None,
    date_format=xmllib.DateFormat.YYYY_MM_DD,
)
# result == "GREGORIAN:CE:2000-11-1:CE:2000-11-1"

# with a date range
result = xmllib.reformat_date(
    date="1.11.2000-2001",
    date_precision_separator=".",
    date_range_separator="-",
    date_format=xmllib.DateFormat.DD_MM_YYYY,
)
# result == "GREGORIAN:CE:2000-11-1:CE:2001"

# islamic calendar, where eras are not allowed
result = xmllib.reformat_date(
    date="1.11.2000",
    date_precision_separator=".",
    date_range_separator=None,
    date_format=xmllib.DateFormat.DD_MM_YYYY,
    calendar=xmllib.Calendar.ISLAMIC,
    era=None
)
# result == "ISLAMIC:2000-11-1:2000-11-1"

# with a different era
result = xmllib.reformat_date(
    date="1.11.2000",
    date_precision_separator=".",
    date_range_separator="-",
    date_format=xmllib.DateFormat.DD_MM_YYYY,
    era=xmllib.Era.AD
)
# result == "GREGORIAN:AD:2000-11-1:AD:2000-11-1"

# reformatted date, no precision in the date string is required
result = xmllib.reformat_date(
    date="2000",
    date_precision_separator=".",
    date_range_separator="-",
    date_format=xmllib.DateFormat.DD_MM_YYYY,
)
# result == "GREGORIAN:CE:2000:CE:2000"

# already correctly formatted date
result = xmllib.reformat_date(
    date="GREGORIAN:CE:2000:CE:2000",
    date_precision_separator=".",
    date_range_separator="-",
    date_format=xmllib.DateFormat.DD_MM_YYYY,
)
# result == "GREGORIAN:CE:2000:CE:2000"

# invalid input: a warning is emitted and the original input is returned
result = xmllib.reformat_date(
    date="not-a-date",
    date_precision_separator=".",
    date_range_separator="-",
    date_format=xmllib.DateFormat.DD_MM_YYYY,
)
# WARNING is emitted
# result == "not-a-date"

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def reformat_date(
    date: str | int,
    date_precision_separator: str | None,
    date_range_separator: str | None,
    date_format: DateFormat,
    calendar: Calendar = Calendar.GREGORIAN,
    era: Era | None = Era.CE,
    resource_id: str | None = None,
) -> str:
    """
    Reformats a date string into the DSP format.

    - If the input cannot be reformatted according to the configuration, or if the result
      is not a valid DSP date, a warning is emitted and the original input is returned.
    - If the input is empty, a warning is emitted and an empty string is returned.
    - If the input is already a correctly formatted DSP-date, the original input is returned.

    Args:
        date: date string to be reformatted
        date_precision_separator: the separation between the day, month and year
        date_range_separator: the separation between two dates
        date_format: the format of the date, see [`DateFormat` for options](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-api-reference/config-options/#xmllib.models.config_options.DateFormat)
        calendar: the calendar of the date, see [`Calendar` for options](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-api-reference/config-options/#xmllib.models.config_options.Calendar)
        era: the era of the date, see [`Era` for options](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-api-reference/config-options/#xmllib.models.config_options.Era)
        resource_id: the ID of the associated resource, this is to improve the error message

    Returns:
        A reformatted date or the original input if the reformatted result is not a valid DSP date

    Examples:
        ```python
        # default configuration, starting with the day
        result = xmllib.reformat_date(
            date="1.11.2000",
            date_precision_separator=".",
            date_range_separator=None,
            date_format=xmllib.DateFormat.DD_MM_YYYY
        )
        # result == "GREGORIAN:CE:2000-11-1:CE:2000-11-1"
        ```

        ```python
        # default configuration, but starting with the year
        result = xmllib.reformat_date(
            date="2000.11.1",
            date_precision_separator=".",
            date_range_separator=None,
            date_format=xmllib.DateFormat.YYYY_MM_DD,
        )
        # result == "GREGORIAN:CE:2000-11-1:CE:2000-11-1"
        ```

        ```python
        # with a date range
        result = xmllib.reformat_date(
            date="1.11.2000-2001",
            date_precision_separator=".",
            date_range_separator="-",
            date_format=xmllib.DateFormat.DD_MM_YYYY,
        )
        # result == "GREGORIAN:CE:2000-11-1:CE:2001"
        ```

        ```python
        # islamic calendar, where eras are not allowed
        result = xmllib.reformat_date(
            date="1.11.2000",
            date_precision_separator=".",
            date_range_separator=None,
            date_format=xmllib.DateFormat.DD_MM_YYYY,
            calendar=xmllib.Calendar.ISLAMIC,
            era=None
        )
        # result == "ISLAMIC:2000-11-1:2000-11-1"
        ```

        ```python
        # with a different era
        result = xmllib.reformat_date(
            date="1.11.2000",
            date_precision_separator=".",
            date_range_separator="-",
            date_format=xmllib.DateFormat.DD_MM_YYYY,
            era=xmllib.Era.AD
        )
        # result == "GREGORIAN:AD:2000-11-1:AD:2000-11-1"
        ```

        ```python
        # reformatted date, no precision in the date string is required
        result = xmllib.reformat_date(
            date="2000",
            date_precision_separator=".",
            date_range_separator="-",
            date_format=xmllib.DateFormat.DD_MM_YYYY,
        )
        # result == "GREGORIAN:CE:2000:CE:2000"
        ```

        ```python
        # already correctly formatted date
        result = xmllib.reformat_date(
            date="GREGORIAN:CE:2000:CE:2000",
            date_precision_separator=".",
            date_range_separator="-",
            date_format=xmllib.DateFormat.DD_MM_YYYY,
        )
        # result == "GREGORIAN:CE:2000:CE:2000"
        ```

        ```python
        # invalid input: a warning is emitted and the original input is returned
        result = xmllib.reformat_date(
            date="not-a-date",
            date_precision_separator=".",
            date_range_separator="-",
            date_format=xmllib.DateFormat.DD_MM_YYYY,
        )
        # WARNING is emitted
        # result == "not-a-date"
        ```
    """
    if not is_nonempty_value_internal(date):
        msg_info = MessageInfo(
            "The date to be reformatted is empty. An empty string is returned.", resource_id=resource_id
        )
        emit_xmllib_input_warning(msg_info)
        return ""
    date = str(date).strip()
    invalid_date_info = MessageInfo(
        f"The provided date '{date}' does not conform to the expected format, the original value is returned.",
        resource_id=resource_id,
    )
    # Here we want to check if the input is already a reformatted date. In that case, we would expect a calendar.
    # The function that checks if an input is a valid date does not require a calendar,
    # so unformatted input for example, '2000' may be accepted as a valid date.
    if regex.search(r"(GREGORIAN|JULIAN|ISLAMIC)", date):
        if is_date_internal(date):
            return date
        else:
            emit_xmllib_input_warning(invalid_date_info)
            return date
    if date_precision_separator and date_range_separator:
        if date_precision_separator == date_range_separator:
            msg_info = MessageInfo(
                f"The precision separator and range separator provided are identical: '{date_precision_separator}'. "
                f"This is not allowed.",
                resource_id=resource_id,
            )
            raise_xmllib_input_error(msg_info)
    if date_range_separator is not None:
        date_split = [found for x in date.split(date_range_separator) if (found := x.strip())]
    else:
        date_split = [date.strip()]
    all_dates = [_reformat_single_date(x, date_precision_separator, date_format, resource_id) for x in date_split]
    if era:
        all_dates = [f"{era.value}:{x}" for x in all_dates]
    if len(all_dates) == 1:
        all_dates.append(all_dates[0])
    reformatted_str = ":".join(all_dates)
    if calendar:
        reformatted_str = f"{calendar.value}:{reformatted_str}"
    if is_date_internal(reformatted_str):
        return reformatted_str
    emit_xmllib_input_warning(invalid_date_info)
    return date

`find_dates_in_string`

Checks if a string contains date values (single dates, or date ranges), and return all found dates as set of DSP-formatted strings. Returns an empty set if no date was found. See XML documentation for details.

Notes

If no era or calendar is given, dates are interpreted in the Common Era and the Gregorian calendar.
Standalone numbers from 000-2999, in 3/4-digit form, are interpreted as years CE.
If a number (with any number of digits) is followed by CE, C.E., AD, A.D., it is interpreted as years CE.
If a number (with any number of digits) is followed by BCE, BC, B.C., B.C.E., av. J.-C., it is interpreted as years BCE.
Dates written with slashes are always interpreted in a European manner: 5/11/2021 is the 5th of November.
In the European notation, 2-digit years are expanded to 4 digits, with the current year as watershed:
- 30.4.24 -> 30.04.2024
- 30.4.50 -> 30.04.1950

Currently supported date formats

0476-09-04 -> GREGORIAN:CE:0476-09-04:CE:0476-09-04
0476_09_04 -> GREGORIAN:CE:0476-09-04:CE:0476-09-04
30.4.2021 -> GREGORIAN:CE:2021-04-30:CE:2021-04-30
30.4.21 -> GREGORIAN:CE:2021-04-30:CE:2021-04-30
5/11/2021 -> GREGORIAN:CE:2021-11-05:CE:2021-11-05
Jan 26, 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
26 Jan 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
26 January 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
1. Jan. 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
1. Januar 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
28.2.-1.12.1515 -> GREGORIAN:CE:1515-02-28:CE:1515-12-01
25.-26.2.0800 -> GREGORIAN:CE:0800-02-25:CE:0800-02-26
1.9.2022-3.1.2024 -> GREGORIAN:CE:2022-09-01:CE:2024-01-03
1848 -> GREGORIAN:CE:1848:CE:1848
1849/1850 -> GREGORIAN:CE:1849:CE:1850
1849/50 -> GREGORIAN:CE:1849:CE:1850
1845-50 -> GREGORIAN:CE:1845:CE:1850
840-50 -> GREGORIAN:CE:840:CE:850
840-1 -> GREGORIAN:CE:840:CE:841
9 BC / 9 B.C. / 9 B.C.E. / 9 BCE -> GREGORIAN:BC:9:BC:9
20 BCE - 50 CE -> GREGORIAN:BC:20:CE:50
1000-900 av. J.-C. -> GREGORIAN:BC:1000:BC:900
45 av. J.-C. -> GREGORIAN:BC:45:BC:45

Parameters:

Name	Type	Description	Default
`string`	`str`	string to check	required

Returns:

Type	Description
`set[str]`	(possibly empty) set of DSP-formatted date strings

Examples:

result = xmllib.find_dates_in_string("1849/1850")
# result == {"GREGORIAN:CE:1849:CE:1850"}

result = xmllib.find_dates_in_string("not a valid date")
# result == {}

result = xmllib.find_dates_in_string("first date: 2024. Second: 2025.")
# result == {"GREGORIAN:CE:2024:CE:2024", "GREGORIAN:CE:2025:CE:2025"}

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def find_dates_in_string(string: str) -> set[str]:
    """
    Checks if a string contains date values (single dates, or date ranges),
    and return all found dates as set of DSP-formatted strings.
    Returns an empty set if no date was found.
    [See XML documentation for details](https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#date).

    Notes:
        - If no era or calendar is given, dates are interpreted in the Common Era and the Gregorian calendar.
        - Standalone numbers from 000-2999, in 3/4-digit form, are interpreted as years CE.
        - If a number (with any number of digits) is followed by CE, C.E., AD, A.D., it is interpreted as years CE.
        - If a number (with any number of digits) is followed by BCE, BC, B.C., B.C.E., av. J.-C.,
          it is interpreted as years BCE.
        - Dates written with slashes are always interpreted in a European manner: 5/11/2021 is the 5th of November.
        - In the European notation, 2-digit years are expanded to 4 digits, with the current year as watershed:
            - 30.4.24 -> 30.04.2024
            - 30.4.50 -> 30.04.1950

    Currently supported date formats:
        - 0476-09-04 -> GREGORIAN:CE:0476-09-04:CE:0476-09-04
        - 0476_09_04 -> GREGORIAN:CE:0476-09-04:CE:0476-09-04
        - 30.4.2021 -> GREGORIAN:CE:2021-04-30:CE:2021-04-30
        - 30.4.21 -> GREGORIAN:CE:2021-04-30:CE:2021-04-30
        - 5/11/2021 -> GREGORIAN:CE:2021-11-05:CE:2021-11-05
        - Jan 26, 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
        - 26 Jan 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
        - 26 January 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
        - 26. Jan. 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
        - 26. Januar 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
        - 28.2.-1.12.1515 -> GREGORIAN:CE:1515-02-28:CE:1515-12-01
        - 25.-26.2.0800 -> GREGORIAN:CE:0800-02-25:CE:0800-02-26
        - 1.9.2022-3.1.2024 -> GREGORIAN:CE:2022-09-01:CE:2024-01-03
        - 1848 -> GREGORIAN:CE:1848:CE:1848
        - 1849/1850 -> GREGORIAN:CE:1849:CE:1850
        - 1849/50 -> GREGORIAN:CE:1849:CE:1850
        - 1845-50 -> GREGORIAN:CE:1845:CE:1850
        - 840-50 -> GREGORIAN:CE:840:CE:850
        - 840-1 -> GREGORIAN:CE:840:CE:841
        - 9 BC / 9 B.C. / 9 B.C.E. / 9 BCE -> GREGORIAN:BC:9:BC:9
        - 20 BCE - 50 CE -> GREGORIAN:BC:20:CE:50
        - 1000-900 av. J.-C. -> GREGORIAN:BC:1000:BC:900
        - 45 av. J.-C. -> GREGORIAN:BC:45:BC:45

    Args:
        string: string to check

    Returns:
        (possibly empty) set of DSP-formatted date strings

    Examples:
        ```python
        result = xmllib.find_dates_in_string("1849/1850")
        # result == {"GREGORIAN:CE:1849:CE:1850"}
        ```

        ```python
        result = xmllib.find_dates_in_string("not a valid date")
        # result == {}
        ```

        ```python
        result = xmllib.find_dates_in_string("first date: 2024. Second: 2025.")
        # result == {"GREGORIAN:CE:2024:CE:2024", "GREGORIAN:CE:2025:CE:2025"}
        ```
    """

    # sanitise input, just in case that the function was called on an empty or N/A cell
    if not is_nonempty_value_internal(string):
        return set()
    return _find_dates_in_string(string)

`make_xsd_compatible_id`

An xsd:ID may not contain all types of special characters, and it must start with a letter or underscore. Replace illegal characters with _, and prepend a leading _ if necessary.

The string must contain at least one Unicode letter (matching the regex \p{L}), _, !, ?, or number, but must not be None, <NA>, N/A, or -.

Parameters:

Name	Type	Description	Default
`input_value`	`str \| float \| int`	input value	required

Returns:

Type	Description
`str`	An xsd ID compatible string based on the input value

Examples:

result = xmllib.make_xsd_compatible_id("0_Universität_Basel")
# result == "_0_Universit_t_Basel"

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def make_xsd_compatible_id(input_value: str | float | int) -> str:
    """
    An xsd:ID may not contain all types of special characters,
    and it must start with a letter or underscore.
    Replace illegal characters with `_`, and prepend a leading `_` if necessary.

    The string must contain at least one Unicode letter (matching the regex ``\\p{L}``),
    `_`, `!`, `?`, or number, but must not be `None`, `<NA>`, `N/A`, or `-`.

    Args:
        input_value: input value

    Raises:
        XmllibInputError: if the input cannot be transformed to an xsd:ID

    Returns:
        An xsd ID compatible string based on the input value

    Examples:
        ```python
        result = xmllib.make_xsd_compatible_id("0_Universität_Basel")
        # result == "_0_Universit_t_Basel"
        ```
    """
    if not is_nonempty_value_internal(input_value):
        raise_xmllib_input_error(MessageInfo(f"The input '{input_value}' cannot be transformed to an xsd:ID"))
    # if the start of string is neither letter nor underscore, add an underscore
    res = regex.sub(r"^(?=[^A-Za-z_])", "_", str(input_value))
    # replace all illegal characters by underscore
    res = regex.sub(r"[^\w_\-.]", "_", res, flags=regex.ASCII)
    return res

`make_xsd_compatible_id_with_uuid`

An xsd:ID may not contain all types of special characters, and it must start with a letter or underscore. Replace illegal characters with _, and prepend a leading _ if necessary. Additionally, add a UUID at the end. The UUID will be different each time the function is called.

The string must contain at least one Unicode letter (matching the regex \p{L}), _, !, ?, or number, but must not be None, <NA>, N/A, or -.

Parameters:

Name	Type	Description	Default
`input_value`	`str \| float \| int`	input value	required

Returns:

Type	Description
`str`	an xsd ID based on the input value, with a UUID attached.

Examples:

result = xmllib.make_xsd_compatible_id_with_uuid("Universität_Basel")
# result == "Universit_t_Basel_88f5cd0b-f333-4174-9030-65900b17773d"

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def make_xsd_compatible_id_with_uuid(input_value: str | float | int) -> str:
    """
    An xsd:ID may not contain all types of special characters,
    and it must start with a letter or underscore.
    Replace illegal characters with `_`, and prepend a leading `_` if necessary.
    Additionally, add a UUID at the end.
    The UUID will be different each time the function is called.

    The string must contain at least one Unicode letter (matching the regex ``\\p{L}``),
    `_`, `!`, `?`, or number, but must not be `None`, `<NA>`, `N/A`, or `-`.

    Args:
        input_value: input value

    Raises:
        XmllibInputError: if the input cannot be transformed to an xsd:ID

    Returns:
        an xsd ID based on the input value, with a UUID attached.

    Examples:
        ```python
        result = xmllib.make_xsd_compatible_id_with_uuid("Universität_Basel")
        # result == "Universit_t_Basel_88f5cd0b-f333-4174-9030-65900b17773d"
        ```
    """
    res = make_xsd_compatible_id(input_value)
    _uuid = uuid.uuid4()
    res = f"{res}_{_uuid}"
    return res

`create_list_from_string`

Creates a list from a string. Trailing and leading whitespaces are removed from the list items.

Parameters:

Name	Type	Description	Default
`string`	`str`	input string	required
`separator`	`str`	The character that separates the different values in the string. For example, a comma or newline.	required

Returns:

Type	Description
`list[str]`	The list that results from splitting the input string. If the original string is empty or consists only of whitespace characters, the resulting list will be empty.

Attention

This function will be removed in the future. Use create_list_from_input instead.

Examples:

result = xmllib.create_list_from_string(" One/  Two\n/", "/")
# result == ["One", "Two"]

result = xmllib.create_list_from_string("   \n    ", "\n")
# result == []

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def create_list_from_string(string: str, separator: str) -> list[str]:
    """
    Creates a list from a string.
    Trailing and leading whitespaces are removed from the list items.

    Args:
        string: input string
        separator: The character that separates the different values in the string.
            For example, a comma or newline.

    Returns:
        The list that results from splitting the input string.
            If the original string is empty or consists only of whitespace characters, the resulting list will be empty.

    Raises:
        XmllibInputError: If the input value is not a string.

    Attention:
        This function will be removed in the future. Use `create_list_from_input` instead.

    Examples:
        ```python
        result = xmllib.create_list_from_string(" One/  Two\\n/", "/")
        # result == ["One", "Two"]
        ```

        ```python
        result = xmllib.create_list_from_string("   \\n    ", "\\n")
        # result == []
        ```
    """
    msg = "This function will be deleted in the future. Use the new function called 'create_list_from_input' instead."
    warnings.warn(DspToolsFutureWarning(msg))
    if not isinstance(string, str):
        raise_xmllib_input_error(
            MessageInfo(f"The input for this function must be a string. Your input is a {type(string).__name__}.")
        )
    return [strpd for x in string.split(separator) if (strpd := x.strip())]

`create_list_from_input`

Create a list of strings from the input value, using the provided separator. If the input is empty it returns an empty list.

Parameters:

Name	Type	Description	Default
`input_value`	`Any`	input value to check and convert	required
`separator`	`str`	The character that separates the different values in the string. For example, a comma or newline.	required

Returns:

Type	Description
`list[str]`	The list that results from splitting the input string.

Examples:

result = xmllib.create_list_from_input("  one, two,  three", ",")
# result == ["one", "two", "three"]

result = xmllib.create_list_from_input(1, "-")
# result == ["1"]

result = xmllib.create_list_from_input("   \n    ", "\n")
# result == []

result = xmllib.create_list_from_input(None, ",")
# result == []

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def create_list_from_input(input_value: Any, separator: str) -> list[str]:
    """
    Create a list of strings from the input value, using the provided separator.
    If the input is empty it returns an empty list.

    Args:
        input_value: input value to check and convert
        separator: The character that separates the different values in the string.
            For example, a comma or newline.

    Returns:
        The list that results from splitting the input string.

    Examples:
        ```python
        result = xmllib.create_list_from_input("  one, two,  three", ",")
        # result == ["one", "two", "three"]
        ```

        ```python
        result = xmllib.create_list_from_input(1, "-")
        # result == ["1"]
        ```

        ```python
        result = xmllib.create_list_from_input("   \\n    ", "\\n")
        # result == []
        ```

        ```python
        result = xmllib.create_list_from_input(None, ",")
        # result == []
        ```
    """
    if not is_nonempty_value_internal(input_value):
        return []
    if isinstance(input_value, str):
        return [strpd for x in input_value.split(separator) if (strpd := x.strip())]
    return [str(input_value)]

`create_non_empty_list_from_string`

Creates a list from a string. Trailing and leading whitespaces are removed from the list items.

If the resulting list is empty it will raise an XmllibInputError.

Parameters:

Name	Type	Description	Default
`string`	`str`	input string	required
`separator`	`str`	The character that separates the different values in the string. For example, a comma or newline.	required
`resource_id`	`str \| None`	If the ID of the resource is provided, a better error message can be composed	`None`
`prop_name`	`str \| None`	If the name of the property is provided, a better error message can be composed	`None`

Returns:

Type	Description
`list[str]`	The list that results from splitting the input string.

Examples:

result = xmllib.create_non_empty_list_from_string("One\nTwo   ", "\n")
# result == ["One", "Two"]

result = xmllib.create_non_empty_list_from_string("   \n/    ", "/")
# raises XmllibInputError

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def create_non_empty_list_from_string(
    string: str, separator: str, resource_id: str | None = None, prop_name: str | None = None
) -> list[str]:
    """
    Creates a list from a string.
    Trailing and leading whitespaces are removed from the list items.

    If the resulting list is empty it will raise an `XmllibInputError`.

    Args:
        string: input string
        separator: The character that separates the different values in the string.
            For example, a comma or newline.
        resource_id: If the ID of the resource is provided, a better error message can be composed
        prop_name: If the name of the property is provided, a better error message can be composed

    Returns:
        The list that results from splitting the input string.

    Raises:
        XmllibInputError: If the resulting list is empty.

    Examples:
        ```python
        result = xmllib.create_non_empty_list_from_string("One\\nTwo   ", "\\n")
        # result == ["One", "Two"]
        ```

        ```python
        result = xmllib.create_non_empty_list_from_string("   \\n/    ", "/")
        # raises XmllibInputError
        ```
    """
    lst = create_list_from_input(string, separator)
    if len(lst) == 0:
        msg_info = MessageInfo(
            message="The input for this function must result in a non-empty list. Your input results in an empty list.",
            resource_id=resource_id,
            prop_name=prop_name,
        )
        raise_xmllib_input_error(msg_info)
    return lst

`clean_whitespaces_from_string`

Remove redundant whitespaces (space, \n, \t, etc.) and replace them with a single space.

If the resulting string is empty, a warning will be printed.

Parameters:

Name	Type	Description	Default
`string`	`str`	input string	required

Returns:

Type	Description
`str`	The cleaned string.

Examples:

result = xmllib.clean_whitespaces_from_string("\t Text\nafter newline")
# result == "Text after newline"

result = xmllib.clean_whitespaces_from_string("      \n\t ")
# result == ""
# warns that the string is now empty

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def clean_whitespaces_from_string(string: str) -> str:
    """
    Remove redundant whitespaces (space, `\\n`, `\\t`, etc.) and replace them with a single space.

    If the resulting string is empty, a warning will be printed.

    Args:
        string: input string

    Returns:
        The cleaned string.

    Examples:
        ```python
        result = xmllib.clean_whitespaces_from_string("\\t Text\\nafter newline")
        # result == "Text after newline"
        ```

        ```python
        result = xmllib.clean_whitespaces_from_string("      \\n\\t ")
        # result == ""
        # warns that the string is now empty
        ```
    """
    cleaned = regex.sub(r"\s+", " ", string).strip()
    if len(cleaned) == 0:
        emit_xmllib_input_warning(
            MessageInfo(
                "The entered string is empty after all redundant whitespaces were removed. An empty string is returned."
            )
        )
    return cleaned

`find_license_in_string`

Checks if a string contains a license, and returns it. Returns None if no license was found. The case (upper case/lower case) is ignored.

Look out: Your string should contain no more than 1 license. If it contains more, there is no guarantee which one will be returned.

See recommended licenses for details.

Parameters:

Name	Type	Description	Default
`string`	`str`	string to check	required

Returns:

Type	Description
`License \| None`	`License` object or `None`

Examples:

result = xmllib.find_license_in_string("CC BY")
# result == LicenseRecommended.CC.BY

result = xmllib.find_license_in_string("Creative Commons Developing Nations 2.0 Generic Deed")
# result == None

Currently supported license formats

"AI" -> LicenseRecommended.DSP.AI_GENERATED
"KI" -> LicenseRecommended.DSP.AI_GENERATED
"IA" -> LicenseRecommended.DSP.AI_GENERATED
"public domain" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
"gemeinfrei" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
"frei von Urheberrechten" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
"urheberrechtsbefreit" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
"libre de droits" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
"domaine public" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
"unknown" -> LicenseRecommended.DSP.UNKNOWN
"unbekannt" -> LicenseRecommended.DSP.UNKNOWN
"inconnu" -> LicenseRecommended.DSP.UNKNOWN
"CC BY" -> LicenseRecommended.CC.BY
"Creative Commons BY 4.0" -> LicenseRecommended.CC.BY
"CC 0 1.0" -> LicenseOther.Public.CC_0_1_0
"CC PDM 1.0" -> LicenseOther.Public.CC_PDM_1_0
"BORIS Standard License" -> LicenseOther.Various.BORIS_STANDARD
"LICENCE OUVERTE 2.0" -> LicenseOther.Various.FRANCE_OUVERTE

Source code in dsp/dsp-tools/src/dsp_tools/xmllib/helpers.py

def find_license_in_string(string: str) -> License | None:  # noqa: PLR0911 (too many return statements)
    """
    Checks if a string contains a license, and returns it.
    Returns None if no license was found.
    The case (upper case/lower case) is ignored.

    Look out: Your string should contain no more than 1 license.
    If it contains more, there is no guarantee which one will be returned.

    See [recommended licenses](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-api-reference/licenses/recommended/)
    for details.

    Args:
        string: string to check

    Returns:
        `License` object or `None`

    Examples:
        ```python
        result = xmllib.find_license_in_string("CC BY")
        # result == LicenseRecommended.CC.BY
        ```

        ```python
        result = xmllib.find_license_in_string("Creative Commons Developing Nations 2.0 Generic Deed")
        # result == None
        ```

    Currently supported license formats:
        - "AI" -> LicenseRecommended.DSP.AI_GENERATED
        - "KI" -> LicenseRecommended.DSP.AI_GENERATED
        - "IA" -> LicenseRecommended.DSP.AI_GENERATED
        - "public domain" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
        - "gemeinfrei" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
        - "frei von Urheberrechten" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
        - "urheberrechtsbefreit" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
        - "libre de droits" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
        - "domaine public" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
        - "unknown" -> LicenseRecommended.DSP.UNKNOWN
        - "unbekannt" -> LicenseRecommended.DSP.UNKNOWN
        - "inconnu" -> LicenseRecommended.DSP.UNKNOWN
        - "CC BY" -> LicenseRecommended.CC.BY
        - "Creative Commons BY 4.0" -> LicenseRecommended.CC.BY
        - "CC 0 1.0" -> LicenseOther.Public.CC_0_1_0
        - "CC PDM 1.0" -> LicenseOther.Public.CC_PDM_1_0
        - "BORIS Standard License" -> LicenseOther.Various.BORIS_STANDARD
        - "LICENCE OUVERTE 2.0" -> LicenseOther.Various.FRANCE_OUVERTE
    """
    if lic := _get_already_parsed_license(string):
        return lic

    sep = r"[-_\p{Zs}]+"  # Zs = unicode category for space separator characters

    if regex.search(rf"\b(Creative{sep}Commons|CC){sep}0({sep}1\.0)?\b", string, flags=regex.IGNORECASE):
        return LicenseOther.Public.CC_0_1_0

    if regex.search(rf"\b(Creative{sep}Commons|CC){sep}PDM({sep}1\.0)?\b", string, flags=regex.IGNORECASE):
        return LicenseOther.Public.CC_PDM_1_0

    if match := regex.search(
        rf"\b(CC|Creative{sep}Commons)({sep}(BY|NC|ND|SA))*({sep}[\d\.]+)?\b", string, flags=regex.IGNORECASE
    ):
        return _find_cc_license(match.group(0))

    if regex.search(r"\b(AI|IA|KI)\b", string, flags=regex.IGNORECASE):
        return LicenseRecommended.DSP.AI_GENERATED

    rgx_public_domain = (
        rf"\b(public{sep}domain|gemeinfrei|frei{sep}von{sep}Urheberrechten|urheberrechtsbefreit|"
        rf"libre{sep}de{sep}droits|domaine{sep}public)\b"
    )
    if regex.search(rgx_public_domain, string, flags=regex.IGNORECASE):
        return LicenseRecommended.DSP.PUBLIC_DOMAIN

    if regex.search(r"\b(unknown|unbekannt|inconnu)\b", string, flags=regex.IGNORECASE):
        return LicenseRecommended.DSP.UNKNOWN

    if regex.search(
        rf"\b(BORIS|Bern{sep}Open{sep}Repository{sep}and{sep}Information{sep}System){sep}Standard{sep}License\b",
        string,
        flags=regex.IGNORECASE,
    ):
        return LicenseOther.Various.BORIS_STANDARD

    if regex.search(
        rf"\b(France{sep})?Licence{sep}ouverte({sep}2\.0)?\b",
        string,
        flags=regex.IGNORECASE,
    ):
        return LicenseOther.Various.FRANCE_OUVERTE

    return None