Templates module¶

This module contains utility functions used with and supporting template rendering as part of the processing pipeline and labeling rules.

`as_datetime(v)` ¶

Utility filter for converting a string to datetime.

Parameters:

Name	Type	Description	Default
`v`	`str`	The string to convert	required

Returns:

Type	Description
`datetime`	Converted datetime object.

Source code in dataset/templates.py

def as_datetime(v: str) -> datetime:
    """Utility filter for converting a string to datetime.

    Args:
        v: The string to convert

    Returns:
        Converted datetime object.
    """
    return parse_obj_as(datetime, v)

`create_environment(templates_dirs=None, es=None, dataset_config=None)` ¶

Create Jinja2 native environment for rendering dataset templates.

Parameters:

Name	Type	Description	Default
`templates_dirs`	`Union[str, pathlib.Path, List[Union[str, pathlib.Path]]]`	The template directories	`None`
`es`	`Optional[elasticsearch.client.Elasticsearch]`	The elasticsearch client object	`None`
`dataset_config`	`Optional[cr_kyoushi.dataset.config.DatasetConfig]`	The dataset configuration	`None`

Returns:

Type	Description
`NativeEnvironment`	Jinja2 template environment

Source code in dataset/templates.py

def create_environment(
    templates_dirs: Optional[Union[Text, Path, List[Union[Text, Path]]]] = None,
    es: Optional[Elasticsearch] = None,
    dataset_config: Optional[DatasetConfig] = None,
) -> NativeEnvironment:
    """Create Jinja2 native environment for rendering dataset templates.

    Args:
        templates_dirs: The template directories
        es: The elasticsearch client object
        dataset_config: The dataset configuration

    Returns:
        Jinja2 template environment
    """

    if templates_dirs is None:
        templates_dirs = [
            Path("./templates"),
            Path("./"),
        ]

    env_loader = ChoiceLoader(
        [
            FileSystemLoader(templates_dirs),
            PackageLoader("cr_kyoushi.dataset", "templates"),
        ]
    )
    env = NativeEnvironment(
        loader=env_loader,
        undefined=StrictUndefined,
        extensions=["jinja2.ext.do", "jinja2.ext.loopcontrols"],
    )
    custom_tests = {
        "match_any": match_any,
        "regex": regex,
        "regex_search": regex_search,
        "regex_match": regex_match,
    }

    custom_filters = {
        "as_datetime": as_datetime,
    }

    custom_globals = {
        "context": get_context,
        "datetime": datetime,
        "timedelta": timedelta,
    }

    if es is not None:
        if dataset_config is not None:
            search_function = functools.partial(
                elastic_dsl_search, using=es, dataset_name=dataset_config.name
            )
            eql_function = functools.partial(
                elastic_eql_search, es=es, dataset_name=dataset_config.name
            )
        else:
            search_function = functools.partial(elastic_dsl_search, using=es)
            eql_function = functools.partial(elastic_eql_search, es=es)
        custom_globals["Search"] = search_function
        custom_globals["Q"] = Q
        custom_globals["Q_ALL"] = q_all
        custom_globals["Q_MATCH_ALL"] = functools.partial(q_all, "match")
        custom_globals["Q_TERM_ALL"] = functools.partial(q_all, "term")
        custom_globals["EQL"] = eql_function

    env.tests.update(custom_tests)
    env.filters.update(custom_filters)
    env.globals.update(custom_globals)

    return env

`elastic_dsl_search(using, dataset_name=None, prefix_dataset_name=True, index=None, **kwargs)` ¶

Create an Elasticsearch DSL search object.

Parameters:

Name	Type	Description	Default
`using`	`Elasticsearch`	The elasticsearch client object	required
`dataset_name`	`Optional[str]`	The dataset name	`None`
`prefix_dataset_name`	`bool`	If the dataset name should be prefixed to the indices or not	`True`
`index`	`Union[Sequence[str], str]`	The indices to create the search object for	`None`

Returns:

Type	Description
`Search`	Configured elasticsearch DSL search object

Source code in dataset/templates.py

def elastic_dsl_search(
    using: Elasticsearch,
    dataset_name: Optional[str] = None,
    prefix_dataset_name: bool = True,
    index: Optional[Union[Sequence[str], str]] = None,
    **kwargs,
) -> Search:
    """Create an Elasticsearch DSL search object.

    Args:
        using: The elasticsearch client object
        dataset_name: The dataset name
        prefix_dataset_name: If the dataset name should be prefixed to the indices or not
        index: The indices to create the search object for

    Returns:
        Configured elasticsearch DSL search object
    """
    _index = resolve_indices(dataset_name, prefix_dataset_name, index)
    return Search(using=using, index=_index, **kwargs)

`elastic_eql_search(es, body, dataset_name=None, prefix_dataset_name=True, index=None)` ¶

Perform an Elasticsearch EQL query.

Parameters:

Name	Type	Description	Default
`es`	`Elasticsearch`	The elasticsearch client object	required
`body`	`Dict[str, Any]`	The EQL query body	required
`dataset_name`	`Optional[str]`	The dataset name	`None`
`prefix_dataset_name`	`bool`	If the dataset name should be prefixed to the indices or not	`True`
`index`	`Union[Sequence[str], str]`	The indices to perform the query on.	`None`

Returns:

Type	Description
`Dict[str, Any]`	The EQL query result

Source code in dataset/templates.py

def elastic_eql_search(
    es: Elasticsearch,
    body: Dict[str, Any],
    dataset_name: Optional[str] = None,
    prefix_dataset_name: bool = True,
    index: Optional[Union[Sequence[str], str]] = None,
) -> Dict[str, Any]:
    """Perform an Elasticsearch EQL query.

    Args:
        es: The elasticsearch client object
        body: The EQL query body
        dataset_name: The dataset name
        prefix_dataset_name: If the dataset name should be prefixed to the indices or not
        index: The indices to perform the query on.

    Returns:
        The EQL query result
    """
    _index = resolve_indices(dataset_name, prefix_dataset_name, index)
    eql = EqlClient(es)
    return eql.search(index=_index, body=body)

`get_context(c)` ¶

Utility function for getting the Jinja2 context.

Parameters:

Name	Type	Description	Default
`c`	`Context`	The Jinja2 context	required

Returns:

Type	Description
`Context`	The Jinja2 context

Source code in dataset/templates.py

@contextfunction
def get_context(c: Context) -> Context:
    """Utility function for getting the Jinja2 context.

    Args:
        c: The Jinja2 context

    Returns:
        The Jinja2 context
    """
    return c

`match_any(value, regex_list)` ¶

Perform multiple re.match and return True if at least on match is found.

Parameters:

Name	Type	Description	Default
`value`	`str`	The string to search in	required
`regex_list`	`List[str]`	Lis tof patterns to try matching	required

Returns:

Type	Description
`bool`	`True` if at least one pattern matches `False` otherwise

Source code in dataset/templates.py

def match_any(value: str, regex_list: List[str]) -> bool:
    """Perform multiple `re.match` and return `True` if at least on match is found.

    Args:
        value: The string to search in
        regex_list: Lis tof patterns to try matching

    Returns:
        `True` if at least one pattern matches `False` otherwise
    """
    return any(re.match(regex, value) for regex in regex_list)

`q_all(qry_type, **kwargs)` ¶

Create elasticsearch DSL bool term requiring all given terms to be true.

Parameters:

Name	Type	Description	Default
`qry_type`	`str`	The DSL query term type	required

Returns:

Type	Description
`<function Q at 0x7fe2bf285d40>`	The configured DSL query term

Source code in dataset/templates.py

def q_all(qry_type: str, **kwargs) -> Q:
    """Create elasticsearch DSL bool term requiring all given terms to be true.

    Args:
        qry_type: The DSL query term type

    Returns:
        The configured DSL query term
    """
    must = []
    for key, val in kwargs.items():
        if isinstance(val, Query):
            must.append(val)
        else:
            must.append(Q(qry_type, **{key: val}))
    return Q("bool", must=must)

`regex(value='', pattern='', ignorecase=False, multiline=False, match_type='search')` ¶

Expose re as a boolean filter using the search method by default. This is likely only useful for search and match which already have their own filters.

Note

Taken from Ansible

Parameters:

Name	Type	Description	Default
`value`	`str`	The string to search in	`''`
`pattern`	`str`	The pattern to search	`''`
`ignorecase`	`bool`	If the case should be ignored or not	`False`
`multiline`	`bool`	If multiline matching should be used or not	`False`
`match_type`	`str`	The re pattern match type to use	`'search'`

Returns:

Type	Description
`bool`	`True` if a match was found `False` otherwise.

Source code in dataset/templates.py

def regex(
    value: str = "",
    pattern: str = "",
    ignorecase: bool = False,
    multiline: bool = False,
    match_type: str = "search",
) -> bool:
    """Expose `re` as a boolean filter using the `search` method by default.
    This is likely only useful for `search` and `match` which already
    have their own filters.

    !!! Note
        Taken from Ansible

    Args:
        value: The string to search in
        pattern: The pattern to search
        ignorecase: If the case should be ignored or not
        multiline: If multiline matching should be used or not
        match_type: The re pattern match type to use

    Returns:
        `True` if a match was found `False` otherwise.
    """
    flags = 0
    if ignorecase:
        flags |= re.I
    if multiline:
        flags |= re.M
    _re = re.compile(pattern, flags=flags)
    return bool(getattr(_re, match_type, "search")(value))

`regex_match(value, pattern='', ignorecase=False, multiline=False)` ¶

Perform a re.match returning a boolean

Note

Taken from Ansible

Parameters:

Name	Type	Description	Default
`value`	`str`	The string to search in	required
`pattern`	`str`	The pattern to search	`''`
`ignorecase`	`bool`	If the case should be ignored or not	`False`
`multiline`	`bool`	If multiline matching should be used or not	`False`

Returns:

Type	Description
`bool`	`True` if a match was found `False` otherwise.

Source code in dataset/templates.py

def regex_match(
    value: str, pattern: str = "", ignorecase: bool = False, multiline: bool = False
) -> bool:
    """Perform a `re.match` returning a boolean

    !!! Note
        Taken from Ansible

    Args:
        value: The string to search in
        pattern: The pattern to search
        ignorecase: If the case should be ignored or not
        multiline: If multiline matching should be used or not

    Returns:
        `True` if a match was found `False` otherwise.
    """
    return regex(value, pattern, ignorecase, multiline, "match")

`regex_search(value, pattern='', ignorecase=False, multiline=False)` ¶

Perform a re.search returning a boolean

Note

Taken from Ansible

Parameters:

Name	Type	Description	Default
`value`	`str`	The string to search in	required
`pattern`	`str`	The pattern to search	`''`
`ignorecase`	`bool`	If the case should be ignored or not	`False`
`multiline`	`bool`	If multiline matching should be used or not	`False`

Returns:

Type	Description
`bool`	`True` if a match was found `False` otherwise.

Source code in dataset/templates.py

def regex_search(
    value: str, pattern: str = "", ignorecase: bool = False, multiline: bool = False
) -> bool:
    """Perform a `re.search` returning a boolean

    !!! Note
        Taken from Ansible

    Args:
        value: The string to search in
        pattern: The pattern to search
        ignorecase: If the case should be ignored or not
        multiline: If multiline matching should be used or not

    Returns:
        `True` if a match was found `False` otherwise.
    """
    return regex(value, pattern, ignorecase, multiline, "search")

`render_template(template, variables, es=None, dataset_config=None)` ¶

Renders a dataset Jinja2 template string or file.

Parameters:

Name	Type	Description	Default
`template`	`Union[str, pathlib.Path]`	The template string or file	required
`variables`	`Dict[str, Any]`	The context variables to use for rendering	required
`es`	`Optional[elasticsearch.client.Elasticsearch]`	The elasticsearch client object	`None`
`dataset_config`	`Optional[cr_kyoushi.dataset.config.DatasetConfig]`	The dataset configuration	`None`

Returns:

Type	Description
`Any`	The rendered Jinja2 template

Source code in dataset/templates.py

def render_template(
    template: Union[Text, Path],
    variables: Dict[str, Any],
    es: Optional[Elasticsearch] = None,
    dataset_config: Optional[DatasetConfig] = None,
) -> Any:
    """Renders a dataset Jinja2 template string or file.

    Args:
        template: The template string or file
        variables: The context variables to use for rendering
        es: The elasticsearch client object
        dataset_config: The dataset configuration

    Returns:
        The rendered Jinja2 template
    """
    # get jinja2 environment
    env = create_environment(es=es, dataset_config=dataset_config)

    # convert strings to template
    if isinstance(template, Path):
        _template = env.get_template(str(template))
    else:
        _template = env.from_string(template)

    value = _template.render(**variables)

    if isinstance(value, Undefined):
        value._fail_with_undefined_error()
    return value

`render_template_recursive(data, variables, es=None, dataset_config=None)` ¶

Renders a complex object containing Jinja2 templates

The complex object can be either a string, list or dictionary. This function will recurse all sub elements (e.g., dictionary values) and render any Jinja2 template strings it finds.

Parameters:

Name	Type	Description	Default
`data`	`Any`	The object to render	required
`variables`	`Dict[str, Any]`	The context variables to use for rendering	required
`es`	`Optional[elasticsearch.client.Elasticsearch]`	The elasticsearch client object	`None`
`dataset_config`	`Optional[cr_kyoushi.dataset.config.DatasetConfig]`	The dataset configuration	`None`

Returns:

Type	Description
`Any`	The object with all its Jinja2 templates rendered.

Source code in dataset/templates.py

def render_template_recursive(
    data: Any,
    variables: Dict[str, Any],
    es: Optional[Elasticsearch] = None,
    dataset_config: Optional[DatasetConfig] = None,
) -> Any:
    """Renders a complex object containing Jinja2 templates

    The complex object can be either a string, list or dictionary.
    This function will recurse all sub elements (e.g., dictionary values)
    and render any Jinja2 template strings it finds.

    Args:
        data: The object to render
        variables: The context variables to use for rendering
        es: The elasticsearch client object
        dataset_config: The dataset configuration

    Returns:
        The object with all its Jinja2 templates rendered.
    """

    # handle sub dicts
    if isinstance(data, dict):
        data_rendered = {}
        for key, val in data.items():
            # for sub dicts keys we also allow temp
            key = render_template_recursive(key, variables, es, dataset_config)
            val = render_template_recursive(val, variables, es, dataset_config)
            data_rendered[key] = val
        return data_rendered

    # handle list elements
    if isinstance(data, list):
        return [
            render_template_recursive(val, variables, es, dataset_config)
            for val in data
        ]

    # handle str and template strings
    if isinstance(data, str):
        return render_template(data, variables, es, dataset_config)

    # all other basic types are returned as is
    return data

`write_template(src, dest, variables, es=None, dataset_config=None)` ¶

Render and write a dataset Jinja2 template file.

Parameters:

Name	Type	Description	Default
`src`	`Path`	The template source	required
`dest`	`Path`	The file to write the rendered string to	required
`variables`	`Dict[str, Any]`	The variable context to use for rendering	required
`es`	`Optional[elasticsearch.client.Elasticsearch]`	The elasticsearch client object	`None`
`dataset_config`	`Optional[cr_kyoushi.dataset.config.DatasetConfig]`	The dataset configuration	`None`

Source code in dataset/templates.py

def write_template(
    src: Path,
    dest: Path,
    variables: Dict[str, Any],
    es: Optional[Elasticsearch] = None,
    dataset_config: Optional[DatasetConfig] = None,
):
    """Render and write a dataset Jinja2 template file.

    Args:
        src: The template source
        dest: The file to write the rendered string to
        variables: The variable context to use for rendering
        es: The elasticsearch client object
        dataset_config: The dataset configuration
    """
    template_rendered = render_template(src, variables, es, dataset_config)
    if (
        # mappings are converted to json or yaml
        isinstance(template_rendered, Mapping)
        # lists are also converted to json
        or (
            # need to exclude str types as they are also sequences
            not isinstance(template_rendered, Text)
            and isinstance(template_rendered, Sequence)
        )
    ):
        write_config_file(template_rendered, dest)
    # everything else is coerced to string and written as is
    else:
        with open(dest, "w") as dest_file:
            dest_file.write(str(template_rendered))

Templates module¶

as_datetime(v) ¶

create_environment(templates_dirs=None, es=None, dataset_config=None) ¶

elastic_dsl_search(using, dataset_name=None, prefix_dataset_name=True, index=None, **kwargs) ¶

elastic_eql_search(es, body, dataset_name=None, prefix_dataset_name=True, index=None) ¶

get_context(c) ¶

match_any(value, regex_list) ¶

q_all(qry_type, **kwargs) ¶

regex(value='', pattern='', ignorecase=False, multiline=False, match_type='search') ¶

regex_match(value, pattern='', ignorecase=False, multiline=False) ¶

regex_search(value, pattern='', ignorecase=False, multiline=False) ¶

render_template(template, variables, es=None, dataset_config=None) ¶

render_template_recursive(data, variables, es=None, dataset_config=None) ¶

write_template(src, dest, variables, es=None, dataset_config=None) ¶

`as_datetime(v)` ¶

`create_environment(templates_dirs=None, es=None, dataset_config=None)` ¶

`elastic_dsl_search(using, dataset_name=None, prefix_dataset_name=True, index=None, **kwargs)` ¶

`elastic_eql_search(es, body, dataset_name=None, prefix_dataset_name=True, index=None)` ¶

`get_context(c)` ¶

`match_any(value, regex_list)` ¶

`q_all(qry_type, **kwargs)` ¶

`regex(value='', pattern='', ignorecase=False, multiline=False, match_type='search')` ¶

`regex_match(value, pattern='', ignorecase=False, multiline=False)` ¶

`regex_search(value, pattern='', ignorecase=False, multiline=False)` ¶

`render_template(template, variables, es=None, dataset_config=None)` ¶

`render_template_recursive(data, variables, es=None, dataset_config=None)` ¶

`write_template(src, dest, variables, es=None, dataset_config=None)` ¶