Skip to content

Templates module

This module contains utility functions used with and supporting template rendering as part of the processing pipeline and labeling rules.

as_datetime(v)

Utility filter for converting a string to datetime.

Parameters:

Name Type Description Default
v str

The string to convert

required

Returns:

Type Description
datetime

Converted datetime object.

Source code in dataset/templates.py
def as_datetime(v: str) -> datetime:
    """Utility filter for converting a string to datetime.

    Args:
        v: The string to convert

    Returns:
        Converted datetime object.
    """
    return parse_obj_as(datetime, v)

create_environment(templates_dirs=None, es=None, dataset_config=None)

Create Jinja2 native environment for rendering dataset templates.

Parameters:

Name Type Description Default
templates_dirs Union[str, pathlib.Path, List[Union[str, pathlib.Path]]]

The template directories

None
es Optional[elasticsearch.client.Elasticsearch]

The elasticsearch client object

None
dataset_config Optional[cr_kyoushi.dataset.config.DatasetConfig]

The dataset configuration

None

Returns:

Type Description
NativeEnvironment

Jinja2 template environment

Source code in dataset/templates.py
def create_environment(
    templates_dirs: Optional[Union[Text, Path, List[Union[Text, Path]]]] = None,
    es: Optional[Elasticsearch] = None,
    dataset_config: Optional[DatasetConfig] = None,
) -> NativeEnvironment:
    """Create Jinja2 native environment for rendering dataset templates.

    Args:
        templates_dirs: The template directories
        es: The elasticsearch client object
        dataset_config: The dataset configuration

    Returns:
        Jinja2 template environment
    """

    if templates_dirs is None:
        templates_dirs = [
            Path("./templates"),
            Path("./"),
        ]

    env_loader = ChoiceLoader(
        [
            FileSystemLoader(templates_dirs),
            PackageLoader("cr_kyoushi.dataset", "templates"),
        ]
    )
    env = NativeEnvironment(
        loader=env_loader,
        undefined=StrictUndefined,
        extensions=["jinja2.ext.do", "jinja2.ext.loopcontrols"],
    )
    custom_tests = {
        "match_any": match_any,
        "regex": regex,
        "regex_search": regex_search,
        "regex_match": regex_match,
    }

    custom_filters = {
        "as_datetime": as_datetime,
    }

    custom_globals = {
        "context": get_context,
        "datetime": datetime,
        "timedelta": timedelta,
    }

    if es is not None:
        if dataset_config is not None:
            search_function = functools.partial(
                elastic_dsl_search, using=es, dataset_name=dataset_config.name
            )
            eql_function = functools.partial(
                elastic_eql_search, es=es, dataset_name=dataset_config.name
            )
        else:
            search_function = functools.partial(elastic_dsl_search, using=es)
            eql_function = functools.partial(elastic_eql_search, es=es)
        custom_globals["Search"] = search_function
        custom_globals["Q"] = Q
        custom_globals["Q_ALL"] = q_all
        custom_globals["Q_MATCH_ALL"] = functools.partial(q_all, "match")
        custom_globals["Q_TERM_ALL"] = functools.partial(q_all, "term")
        custom_globals["EQL"] = eql_function

    env.tests.update(custom_tests)
    env.filters.update(custom_filters)
    env.globals.update(custom_globals)

    return env

Create an Elasticsearch DSL search object.

Parameters:

Name Type Description Default
using Elasticsearch

The elasticsearch client object

required
dataset_name Optional[str]

The dataset name

None
prefix_dataset_name bool

If the dataset name should be prefixed to the indices or not

True
index Union[Sequence[str], str]

The indices to create the search object for

None

Returns:

Type Description
Search

Configured elasticsearch DSL search object

Source code in dataset/templates.py
def elastic_dsl_search(
    using: Elasticsearch,
    dataset_name: Optional[str] = None,
    prefix_dataset_name: bool = True,
    index: Optional[Union[Sequence[str], str]] = None,
    **kwargs,
) -> Search:
    """Create an Elasticsearch DSL search object.

    Args:
        using: The elasticsearch client object
        dataset_name: The dataset name
        prefix_dataset_name: If the dataset name should be prefixed to the indices or not
        index: The indices to create the search object for

    Returns:
        Configured elasticsearch DSL search object
    """
    _index = resolve_indices(dataset_name, prefix_dataset_name, index)
    return Search(using=using, index=_index, **kwargs)

Perform an Elasticsearch EQL query.

Parameters:

Name Type Description Default
es Elasticsearch

The elasticsearch client object

required
body Dict[str, Any]

The EQL query body

required
dataset_name Optional[str]

The dataset name

None
prefix_dataset_name bool

If the dataset name should be prefixed to the indices or not

True
index Union[Sequence[str], str]

The indices to perform the query on.

None

Returns:

Type Description
Dict[str, Any]

The EQL query result

Source code in dataset/templates.py
def elastic_eql_search(
    es: Elasticsearch,
    body: Dict[str, Any],
    dataset_name: Optional[str] = None,
    prefix_dataset_name: bool = True,
    index: Optional[Union[Sequence[str], str]] = None,
) -> Dict[str, Any]:
    """Perform an Elasticsearch EQL query.

    Args:
        es: The elasticsearch client object
        body: The EQL query body
        dataset_name: The dataset name
        prefix_dataset_name: If the dataset name should be prefixed to the indices or not
        index: The indices to perform the query on.

    Returns:
        The EQL query result
    """
    _index = resolve_indices(dataset_name, prefix_dataset_name, index)
    eql = EqlClient(es)
    return eql.search(index=_index, body=body)

get_context(c)

Utility function for getting the Jinja2 context.

Parameters:

Name Type Description Default
c Context

The Jinja2 context

required

Returns:

Type Description
Context

The Jinja2 context

Source code in dataset/templates.py
@contextfunction
def get_context(c: Context) -> Context:
    """Utility function for getting the Jinja2 context.

    Args:
        c: The Jinja2 context

    Returns:
        The Jinja2 context
    """
    return c

match_any(value, regex_list)

Perform multiple re.match and return True if at least on match is found.

Parameters:

Name Type Description Default
value str

The string to search in

required
regex_list List[str]

Lis tof patterns to try matching

required

Returns:

Type Description
bool

True if at least one pattern matches False otherwise

Source code in dataset/templates.py
def match_any(value: str, regex_list: List[str]) -> bool:
    """Perform multiple `re.match` and return `True` if at least on match is found.

    Args:
        value: The string to search in
        regex_list: Lis tof patterns to try matching

    Returns:
        `True` if at least one pattern matches `False` otherwise
    """
    return any(re.match(regex, value) for regex in regex_list)

q_all(qry_type, **kwargs)

Create elasticsearch DSL bool term requiring all given terms to be true.

Parameters:

Name Type Description Default
qry_type str

The DSL query term type

required

Returns:

Type Description
<function Q at 0x7fe2bf285d40>

The configured DSL query term

Source code in dataset/templates.py
def q_all(qry_type: str, **kwargs) -> Q:
    """Create elasticsearch DSL bool term requiring all given terms to be true.

    Args:
        qry_type: The DSL query term type

    Returns:
        The configured DSL query term
    """
    must = []
    for key, val in kwargs.items():
        if isinstance(val, Query):
            must.append(val)
        else:
            must.append(Q(qry_type, **{key: val}))
    return Q("bool", must=must)

regex(value='', pattern='', ignorecase=False, multiline=False, match_type='search')

Expose re as a boolean filter using the search method by default. This is likely only useful for search and match which already have their own filters.

Note

Taken from Ansible

Parameters:

Name Type Description Default
value str

The string to search in

''
pattern str

The pattern to search

''
ignorecase bool

If the case should be ignored or not

False
multiline bool

If multiline matching should be used or not

False
match_type str

The re pattern match type to use

'search'

Returns:

Type Description
bool

True if a match was found False otherwise.

Source code in dataset/templates.py
def regex(
    value: str = "",
    pattern: str = "",
    ignorecase: bool = False,
    multiline: bool = False,
    match_type: str = "search",
) -> bool:
    """Expose `re` as a boolean filter using the `search` method by default.
    This is likely only useful for `search` and `match` which already
    have their own filters.

    !!! Note
        Taken from Ansible

    Args:
        value: The string to search in
        pattern: The pattern to search
        ignorecase: If the case should be ignored or not
        multiline: If multiline matching should be used or not
        match_type: The re pattern match type to use

    Returns:
        `True` if a match was found `False` otherwise.
    """
    flags = 0
    if ignorecase:
        flags |= re.I
    if multiline:
        flags |= re.M
    _re = re.compile(pattern, flags=flags)
    return bool(getattr(_re, match_type, "search")(value))

regex_match(value, pattern='', ignorecase=False, multiline=False)

Perform a re.match returning a boolean

Note

Taken from Ansible

Parameters:

Name Type Description Default
value str

The string to search in

required
pattern str

The pattern to search

''
ignorecase bool

If the case should be ignored or not

False
multiline bool

If multiline matching should be used or not

False

Returns:

Type Description
bool

True if a match was found False otherwise.

Source code in dataset/templates.py
def regex_match(
    value: str, pattern: str = "", ignorecase: bool = False, multiline: bool = False
) -> bool:
    """Perform a `re.match` returning a boolean

    !!! Note
        Taken from Ansible

    Args:
        value: The string to search in
        pattern: The pattern to search
        ignorecase: If the case should be ignored or not
        multiline: If multiline matching should be used or not

    Returns:
        `True` if a match was found `False` otherwise.
    """
    return regex(value, pattern, ignorecase, multiline, "match")

Perform a re.search returning a boolean

Note

Taken from Ansible

Parameters:

Name Type Description Default
value str

The string to search in

required
pattern str

The pattern to search

''
ignorecase bool

If the case should be ignored or not

False
multiline bool

If multiline matching should be used or not

False

Returns:

Type Description
bool

True if a match was found False otherwise.

Source code in dataset/templates.py
def regex_search(
    value: str, pattern: str = "", ignorecase: bool = False, multiline: bool = False
) -> bool:
    """Perform a `re.search` returning a boolean

    !!! Note
        Taken from Ansible

    Args:
        value: The string to search in
        pattern: The pattern to search
        ignorecase: If the case should be ignored or not
        multiline: If multiline matching should be used or not

    Returns:
        `True` if a match was found `False` otherwise.
    """
    return regex(value, pattern, ignorecase, multiline, "search")

render_template(template, variables, es=None, dataset_config=None)

Renders a dataset Jinja2 template string or file.

Parameters:

Name Type Description Default
template Union[str, pathlib.Path]

The template string or file

required
variables Dict[str, Any]

The context variables to use for rendering

required
es Optional[elasticsearch.client.Elasticsearch]

The elasticsearch client object

None
dataset_config Optional[cr_kyoushi.dataset.config.DatasetConfig]

The dataset configuration

None

Returns:

Type Description
Any

The rendered Jinja2 template

Source code in dataset/templates.py
def render_template(
    template: Union[Text, Path],
    variables: Dict[str, Any],
    es: Optional[Elasticsearch] = None,
    dataset_config: Optional[DatasetConfig] = None,
) -> Any:
    """Renders a dataset Jinja2 template string or file.

    Args:
        template: The template string or file
        variables: The context variables to use for rendering
        es: The elasticsearch client object
        dataset_config: The dataset configuration

    Returns:
        The rendered Jinja2 template
    """
    # get jinja2 environment
    env = create_environment(es=es, dataset_config=dataset_config)

    # convert strings to template
    if isinstance(template, Path):
        _template = env.get_template(str(template))
    else:
        _template = env.from_string(template)

    value = _template.render(**variables)

    if isinstance(value, Undefined):
        value._fail_with_undefined_error()
    return value

render_template_recursive(data, variables, es=None, dataset_config=None)

Renders a complex object containing Jinja2 templates

The complex object can be either a string, list or dictionary. This function will recurse all sub elements (e.g., dictionary values) and render any Jinja2 template strings it finds.

Parameters:

Name Type Description Default
data Any

The object to render

required
variables Dict[str, Any]

The context variables to use for rendering

required
es Optional[elasticsearch.client.Elasticsearch]

The elasticsearch client object

None
dataset_config Optional[cr_kyoushi.dataset.config.DatasetConfig]

The dataset configuration

None

Returns:

Type Description
Any

The object with all its Jinja2 templates rendered.

Source code in dataset/templates.py
def render_template_recursive(
    data: Any,
    variables: Dict[str, Any],
    es: Optional[Elasticsearch] = None,
    dataset_config: Optional[DatasetConfig] = None,
) -> Any:
    """Renders a complex object containing Jinja2 templates

    The complex object can be either a string, list or dictionary.
    This function will recurse all sub elements (e.g., dictionary values)
    and render any Jinja2 template strings it finds.

    Args:
        data: The object to render
        variables: The context variables to use for rendering
        es: The elasticsearch client object
        dataset_config: The dataset configuration

    Returns:
        The object with all its Jinja2 templates rendered.
    """

    # handle sub dicts
    if isinstance(data, dict):
        data_rendered = {}
        for key, val in data.items():
            # for sub dicts keys we also allow temp
            key = render_template_recursive(key, variables, es, dataset_config)
            val = render_template_recursive(val, variables, es, dataset_config)
            data_rendered[key] = val
        return data_rendered

    # handle list elements
    if isinstance(data, list):
        return [
            render_template_recursive(val, variables, es, dataset_config)
            for val in data
        ]

    # handle str and template strings
    if isinstance(data, str):
        return render_template(data, variables, es, dataset_config)

    # all other basic types are returned as is
    return data

write_template(src, dest, variables, es=None, dataset_config=None)

Render and write a dataset Jinja2 template file.

Parameters:

Name Type Description Default
src Path

The template source

required
dest Path

The file to write the rendered string to

required
variables Dict[str, Any]

The variable context to use for rendering

required
es Optional[elasticsearch.client.Elasticsearch]

The elasticsearch client object

None
dataset_config Optional[cr_kyoushi.dataset.config.DatasetConfig]

The dataset configuration

None
Source code in dataset/templates.py
def write_template(
    src: Path,
    dest: Path,
    variables: Dict[str, Any],
    es: Optional[Elasticsearch] = None,
    dataset_config: Optional[DatasetConfig] = None,
):
    """Render and write a dataset Jinja2 template file.

    Args:
        src: The template source
        dest: The file to write the rendered string to
        variables: The variable context to use for rendering
        es: The elasticsearch client object
        dataset_config: The dataset configuration
    """
    template_rendered = render_template(src, variables, es, dataset_config)
    if (
        # mappings are converted to json or yaml
        isinstance(template_rendered, Mapping)
        # lists are also converted to json
        or (
            # need to exclude str types as they are also sequences
            not isinstance(template_rendered, Text)
            and isinstance(template_rendered, Sequence)
        )
    ):
        write_config_file(template_rendered, dest)
    # everything else is coerced to string and written as is
    else:
        with open(dest, "w") as dest_file:
            dest_file.write(str(template_rendered))