Templates module¶
This module contains utility functions used with and supporting template rendering as part of the processing pipeline and labeling rules.
as_datetime(v)
¶
Utility filter for converting a string to datetime.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
v |
str |
The string to convert |
required |
Returns:
Type | Description |
---|---|
datetime |
Converted datetime object. |
Source code in dataset/templates.py
def as_datetime(v: str) -> datetime:
"""Utility filter for converting a string to datetime.
Args:
v: The string to convert
Returns:
Converted datetime object.
"""
return parse_obj_as(datetime, v)
create_environment(templates_dirs=None, es=None, dataset_config=None)
¶
Create Jinja2 native environment for rendering dataset templates.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
templates_dirs |
Union[str, pathlib.Path, List[Union[str, pathlib.Path]]] |
The template directories |
None |
es |
Optional[elasticsearch.client.Elasticsearch] |
The elasticsearch client object |
None |
dataset_config |
Optional[cr_kyoushi.dataset.config.DatasetConfig] |
The dataset configuration |
None |
Returns:
Type | Description |
---|---|
NativeEnvironment |
Jinja2 template environment |
Source code in dataset/templates.py
def create_environment(
templates_dirs: Optional[Union[Text, Path, List[Union[Text, Path]]]] = None,
es: Optional[Elasticsearch] = None,
dataset_config: Optional[DatasetConfig] = None,
) -> NativeEnvironment:
"""Create Jinja2 native environment for rendering dataset templates.
Args:
templates_dirs: The template directories
es: The elasticsearch client object
dataset_config: The dataset configuration
Returns:
Jinja2 template environment
"""
if templates_dirs is None:
templates_dirs = [
Path("./templates"),
Path("./"),
]
env_loader = ChoiceLoader(
[
FileSystemLoader(templates_dirs),
PackageLoader("cr_kyoushi.dataset", "templates"),
]
)
env = NativeEnvironment(
loader=env_loader,
undefined=StrictUndefined,
extensions=["jinja2.ext.do", "jinja2.ext.loopcontrols"],
)
custom_tests = {
"match_any": match_any,
"regex": regex,
"regex_search": regex_search,
"regex_match": regex_match,
}
custom_filters = {
"as_datetime": as_datetime,
}
custom_globals = {
"context": get_context,
"datetime": datetime,
"timedelta": timedelta,
}
if es is not None:
if dataset_config is not None:
search_function = functools.partial(
elastic_dsl_search, using=es, dataset_name=dataset_config.name
)
eql_function = functools.partial(
elastic_eql_search, es=es, dataset_name=dataset_config.name
)
else:
search_function = functools.partial(elastic_dsl_search, using=es)
eql_function = functools.partial(elastic_eql_search, es=es)
custom_globals["Search"] = search_function
custom_globals["Q"] = Q
custom_globals["Q_ALL"] = q_all
custom_globals["Q_MATCH_ALL"] = functools.partial(q_all, "match")
custom_globals["Q_TERM_ALL"] = functools.partial(q_all, "term")
custom_globals["EQL"] = eql_function
env.tests.update(custom_tests)
env.filters.update(custom_filters)
env.globals.update(custom_globals)
return env
elastic_dsl_search(using, dataset_name=None, prefix_dataset_name=True, index=None, **kwargs)
¶
Create an Elasticsearch DSL search object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
using |
Elasticsearch |
The elasticsearch client object |
required |
dataset_name |
Optional[str] |
The dataset name |
None |
prefix_dataset_name |
bool |
If the dataset name should be prefixed to the indices or not |
True |
index |
Union[Sequence[str], str] |
The indices to create the search object for |
None |
Returns:
Type | Description |
---|---|
Search |
Configured elasticsearch DSL search object |
Source code in dataset/templates.py
def elastic_dsl_search(
using: Elasticsearch,
dataset_name: Optional[str] = None,
prefix_dataset_name: bool = True,
index: Optional[Union[Sequence[str], str]] = None,
**kwargs,
) -> Search:
"""Create an Elasticsearch DSL search object.
Args:
using: The elasticsearch client object
dataset_name: The dataset name
prefix_dataset_name: If the dataset name should be prefixed to the indices or not
index: The indices to create the search object for
Returns:
Configured elasticsearch DSL search object
"""
_index = resolve_indices(dataset_name, prefix_dataset_name, index)
return Search(using=using, index=_index, **kwargs)
elastic_eql_search(es, body, dataset_name=None, prefix_dataset_name=True, index=None)
¶
Perform an Elasticsearch EQL query.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
es |
Elasticsearch |
The elasticsearch client object |
required |
body |
Dict[str, Any] |
The EQL query body |
required |
dataset_name |
Optional[str] |
The dataset name |
None |
prefix_dataset_name |
bool |
If the dataset name should be prefixed to the indices or not |
True |
index |
Union[Sequence[str], str] |
The indices to perform the query on. |
None |
Returns:
Type | Description |
---|---|
Dict[str, Any] |
The EQL query result |
Source code in dataset/templates.py
def elastic_eql_search(
es: Elasticsearch,
body: Dict[str, Any],
dataset_name: Optional[str] = None,
prefix_dataset_name: bool = True,
index: Optional[Union[Sequence[str], str]] = None,
) -> Dict[str, Any]:
"""Perform an Elasticsearch EQL query.
Args:
es: The elasticsearch client object
body: The EQL query body
dataset_name: The dataset name
prefix_dataset_name: If the dataset name should be prefixed to the indices or not
index: The indices to perform the query on.
Returns:
The EQL query result
"""
_index = resolve_indices(dataset_name, prefix_dataset_name, index)
eql = EqlClient(es)
return eql.search(index=_index, body=body)
get_context(c)
¶
Utility function for getting the Jinja2 context.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
c |
Context |
The Jinja2 context |
required |
Returns:
Type | Description |
---|---|
Context |
The Jinja2 context |
Source code in dataset/templates.py
@contextfunction
def get_context(c: Context) -> Context:
"""Utility function for getting the Jinja2 context.
Args:
c: The Jinja2 context
Returns:
The Jinja2 context
"""
return c
match_any(value, regex_list)
¶
Perform multiple re.match
and return True
if at least on match is found.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
str |
The string to search in |
required |
regex_list |
List[str] |
Lis tof patterns to try matching |
required |
Returns:
Type | Description |
---|---|
bool |
|
Source code in dataset/templates.py
def match_any(value: str, regex_list: List[str]) -> bool:
"""Perform multiple `re.match` and return `True` if at least on match is found.
Args:
value: The string to search in
regex_list: Lis tof patterns to try matching
Returns:
`True` if at least one pattern matches `False` otherwise
"""
return any(re.match(regex, value) for regex in regex_list)
q_all(qry_type, **kwargs)
¶
Create elasticsearch DSL bool term requiring all given terms to be true.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
qry_type |
str |
The DSL query term type |
required |
Returns:
Type | Description |
---|---|
<function Q at 0x7fe2bf285d40> |
The configured DSL query term |
Source code in dataset/templates.py
def q_all(qry_type: str, **kwargs) -> Q:
"""Create elasticsearch DSL bool term requiring all given terms to be true.
Args:
qry_type: The DSL query term type
Returns:
The configured DSL query term
"""
must = []
for key, val in kwargs.items():
if isinstance(val, Query):
must.append(val)
else:
must.append(Q(qry_type, **{key: val}))
return Q("bool", must=must)
regex(value='', pattern='', ignorecase=False, multiline=False, match_type='search')
¶
Expose re
as a boolean filter using the search
method by default.
This is likely only useful for search
and match
which already
have their own filters.
Note
Taken from Ansible
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
str |
The string to search in |
'' |
pattern |
str |
The pattern to search |
'' |
ignorecase |
bool |
If the case should be ignored or not |
False |
multiline |
bool |
If multiline matching should be used or not |
False |
match_type |
str |
The re pattern match type to use |
'search' |
Returns:
Type | Description |
---|---|
bool |
|
Source code in dataset/templates.py
def regex(
value: str = "",
pattern: str = "",
ignorecase: bool = False,
multiline: bool = False,
match_type: str = "search",
) -> bool:
"""Expose `re` as a boolean filter using the `search` method by default.
This is likely only useful for `search` and `match` which already
have their own filters.
!!! Note
Taken from Ansible
Args:
value: The string to search in
pattern: The pattern to search
ignorecase: If the case should be ignored or not
multiline: If multiline matching should be used or not
match_type: The re pattern match type to use
Returns:
`True` if a match was found `False` otherwise.
"""
flags = 0
if ignorecase:
flags |= re.I
if multiline:
flags |= re.M
_re = re.compile(pattern, flags=flags)
return bool(getattr(_re, match_type, "search")(value))
regex_match(value, pattern='', ignorecase=False, multiline=False)
¶
Perform a re.match
returning a boolean
Note
Taken from Ansible
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
str |
The string to search in |
required |
pattern |
str |
The pattern to search |
'' |
ignorecase |
bool |
If the case should be ignored or not |
False |
multiline |
bool |
If multiline matching should be used or not |
False |
Returns:
Type | Description |
---|---|
bool |
|
Source code in dataset/templates.py
def regex_match(
value: str, pattern: str = "", ignorecase: bool = False, multiline: bool = False
) -> bool:
"""Perform a `re.match` returning a boolean
!!! Note
Taken from Ansible
Args:
value: The string to search in
pattern: The pattern to search
ignorecase: If the case should be ignored or not
multiline: If multiline matching should be used or not
Returns:
`True` if a match was found `False` otherwise.
"""
return regex(value, pattern, ignorecase, multiline, "match")
regex_search(value, pattern='', ignorecase=False, multiline=False)
¶
Perform a re.search
returning a boolean
Note
Taken from Ansible
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
str |
The string to search in |
required |
pattern |
str |
The pattern to search |
'' |
ignorecase |
bool |
If the case should be ignored or not |
False |
multiline |
bool |
If multiline matching should be used or not |
False |
Returns:
Type | Description |
---|---|
bool |
|
Source code in dataset/templates.py
def regex_search(
value: str, pattern: str = "", ignorecase: bool = False, multiline: bool = False
) -> bool:
"""Perform a `re.search` returning a boolean
!!! Note
Taken from Ansible
Args:
value: The string to search in
pattern: The pattern to search
ignorecase: If the case should be ignored or not
multiline: If multiline matching should be used or not
Returns:
`True` if a match was found `False` otherwise.
"""
return regex(value, pattern, ignorecase, multiline, "search")
render_template(template, variables, es=None, dataset_config=None)
¶
Renders a dataset Jinja2 template string or file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
template |
Union[str, pathlib.Path] |
The template string or file |
required |
variables |
Dict[str, Any] |
The context variables to use for rendering |
required |
es |
Optional[elasticsearch.client.Elasticsearch] |
The elasticsearch client object |
None |
dataset_config |
Optional[cr_kyoushi.dataset.config.DatasetConfig] |
The dataset configuration |
None |
Returns:
Type | Description |
---|---|
Any |
The rendered Jinja2 template |
Source code in dataset/templates.py
def render_template(
template: Union[Text, Path],
variables: Dict[str, Any],
es: Optional[Elasticsearch] = None,
dataset_config: Optional[DatasetConfig] = None,
) -> Any:
"""Renders a dataset Jinja2 template string or file.
Args:
template: The template string or file
variables: The context variables to use for rendering
es: The elasticsearch client object
dataset_config: The dataset configuration
Returns:
The rendered Jinja2 template
"""
# get jinja2 environment
env = create_environment(es=es, dataset_config=dataset_config)
# convert strings to template
if isinstance(template, Path):
_template = env.get_template(str(template))
else:
_template = env.from_string(template)
value = _template.render(**variables)
if isinstance(value, Undefined):
value._fail_with_undefined_error()
return value
render_template_recursive(data, variables, es=None, dataset_config=None)
¶
Renders a complex object containing Jinja2 templates
The complex object can be either a string, list or dictionary. This function will recurse all sub elements (e.g., dictionary values) and render any Jinja2 template strings it finds.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Any |
The object to render |
required |
variables |
Dict[str, Any] |
The context variables to use for rendering |
required |
es |
Optional[elasticsearch.client.Elasticsearch] |
The elasticsearch client object |
None |
dataset_config |
Optional[cr_kyoushi.dataset.config.DatasetConfig] |
The dataset configuration |
None |
Returns:
Type | Description |
---|---|
Any |
The object with all its Jinja2 templates rendered. |
Source code in dataset/templates.py
def render_template_recursive(
data: Any,
variables: Dict[str, Any],
es: Optional[Elasticsearch] = None,
dataset_config: Optional[DatasetConfig] = None,
) -> Any:
"""Renders a complex object containing Jinja2 templates
The complex object can be either a string, list or dictionary.
This function will recurse all sub elements (e.g., dictionary values)
and render any Jinja2 template strings it finds.
Args:
data: The object to render
variables: The context variables to use for rendering
es: The elasticsearch client object
dataset_config: The dataset configuration
Returns:
The object with all its Jinja2 templates rendered.
"""
# handle sub dicts
if isinstance(data, dict):
data_rendered = {}
for key, val in data.items():
# for sub dicts keys we also allow temp
key = render_template_recursive(key, variables, es, dataset_config)
val = render_template_recursive(val, variables, es, dataset_config)
data_rendered[key] = val
return data_rendered
# handle list elements
if isinstance(data, list):
return [
render_template_recursive(val, variables, es, dataset_config)
for val in data
]
# handle str and template strings
if isinstance(data, str):
return render_template(data, variables, es, dataset_config)
# all other basic types are returned as is
return data
write_template(src, dest, variables, es=None, dataset_config=None)
¶
Render and write a dataset Jinja2 template file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
src |
Path |
The template source |
required |
dest |
Path |
The file to write the rendered string to |
required |
variables |
Dict[str, Any] |
The variable context to use for rendering |
required |
es |
Optional[elasticsearch.client.Elasticsearch] |
The elasticsearch client object |
None |
dataset_config |
Optional[cr_kyoushi.dataset.config.DatasetConfig] |
The dataset configuration |
None |
Source code in dataset/templates.py
def write_template(
src: Path,
dest: Path,
variables: Dict[str, Any],
es: Optional[Elasticsearch] = None,
dataset_config: Optional[DatasetConfig] = None,
):
"""Render and write a dataset Jinja2 template file.
Args:
src: The template source
dest: The file to write the rendered string to
variables: The variable context to use for rendering
es: The elasticsearch client object
dataset_config: The dataset configuration
"""
template_rendered = render_template(src, variables, es, dataset_config)
if (
# mappings are converted to json or yaml
isinstance(template_rendered, Mapping)
# lists are also converted to json
or (
# need to exclude str types as they are also sequences
not isinstance(template_rendered, Text)
and isinstance(template_rendered, Sequence)
)
):
write_config_file(template_rendered, dest)
# everything else is coerced to string and written as is
else:
with open(dest, "w") as dest_file:
dest_file.write(str(template_rendered))