"""Controls the inputting of configuration files.
This also serves to bring all of the configuration parameters into a more
accessible space which other parts of Lezargus can use.
Note these configuration constant parameters are all accessed using capital
letters regardless of the configuration file's labels. Because of this, the
names must also obey a stricter set of Python variable naming conventions.
Namely, capital letter names and only alphanumeric characters.
There are constant parameters which are stored here which are not otherwise
changeable by the configuration file.
"""
# isort: split
# Import required to remove circular dependencies from type checking.
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from lezargus.library import hint
# isort: split
import contextlib
import os
import re
import uuid
import yaml
import lezargus
from lezargus.library import logging
# isort: split
[docs]
def sanitize_configuration(configuration: dict) -> dict:
"""Sanitize the configuration, conforming it to the Lezargus standards.
Sometimes configurations input by users do not exactly follow the
expectations of Lezargus, so, here, we sanitize it as much as we can.
Should some level of sanitation fail, then we inform the user.
Parameters
----------
configuration : dict
The configuration we are going to sanitize.
Returns
-------
sanitized_configuration : dict
The configuration, after sanitization.
"""
# We need to entry by entry in sanitization.
sanitized_configuration = {}
for keydex, valuedex in configuration.items():
# We first need to sanitize the key.
sanitized_key = sanitize_configuration_key(key=keydex)
# And the value...
sanitized_value = sanitize_configuration_value(value=valuedex)
# Reconstruction of the dictionary.
sanitized_configuration[sanitized_key] = sanitized_value
# All done.
return sanitized_configuration
[docs]
def sanitize_configuration_key(key: str) -> str:
"""Sanitize only the configuration key name.
The key sanitization makes sure that the key follows the below criteria:
- The key contains only letters and single underscores as word
demarcations.
- The key is all uppercase and is unique across all variations of cases.
Parameters
----------
key : str
The configuration key to sanitize.
Returns
-------
sanitized_key : str
The key, sanitized.
"""
# We replace common text demarcations with underscores. Also,
# only single underscores so we need to remove subsequent underscores.
common_demarcations = [" ", "-", "."]
underscore_key = key
for demarkdex in common_demarcations:
underscore_key = underscore_key.replace(demarkdex, "_")
has_successive_underscores = "__" in underscore_key
while has_successive_underscores:
# Underscore check.
has_successive_underscores = "__" in underscore_key
underscore_key = underscore_key.replace("__", "_")
# We check that it only has letters.
letter_test_key = underscore_key.replace("_", "")
if not letter_test_key.isalnum():
logging.critical(
critical_type=logging.ConfigurationError,
message=(
f"Key {key} contains non-alphanumeric non-underscore"
" characters."
),
)
if not (underscore_key[0].isascii() and underscore_key[0].isalpha()):
logging.critical(
critical_type=logging.ConfigurationError,
message=(
f"Key {key} begins with non-ascii letter; thus making it"
" invalid for a configuration key."
),
)
# We ensure that the case of the key is upper case, and more importantly,
# unique in case.
upper_key = underscore_key.casefold().upper()
# The current stage of the key is sanitized.
sanitized_key = upper_key
return sanitized_key
[docs]
def sanitize_configuration_value(value: hint.Any) -> int | float | str:
"""Sanitize only the configuration value to a string.
Value sanitization ensures just three properties:
- The value in question can be serialized to and from a numeric or string.
- The value is not a dictionary.
- The value string can fit on one line.
We need to require strings because that is the format yaml ensures.
Parameters
----------
value : str
The configuration value to sanitize.
Returns
-------
sanitized_value : int, float, or str
The value, sanitized.
"""
# We need to make sure it is not a dictionary, else, that is likely nested
# configurations.
if isinstance(value, dict):
logging.critical(
critical_type=logging.ConfigurationError,
message=(
"Input value is a dictionary, this would lead to non-flat"
" configurations and files."
),
)
# We need to make sure it can be turned into a string.
try:
value_str = str(value)
except ValueError:
logging.critical(
critical_type=logging.ConfigurationError,
message=f"Input value {value} cannot be turned to a string.",
)
# We have no real metric for it all fitting onto a line. But, we do just
# give a warning if it is long.
too_long_value = 80
if len(value_str) > too_long_value:
logging.warning(
warning_type=logging.ConfigurationWarning,
message=f"Configuration value {value_str} is very long.",
)
# Lastly, we figure out what is the best representation.
if isinstance(value, int | float | str):
sanitized_value = value
else:
# Maybe it is still a number?
try:
sanitized_value = float(value_str)
except ValueError:
# Nope, it is better to just use the string value.
sanitized_value = value_str
return sanitized_value
[docs]
def assign_configuration(key: str, value: float | str) -> None:
"""Assign the configuration in lezargus.config.
Parameters
----------
key : str
The configuration key value. If the key value does not exist in the
main configuration, then an error is raised as it generally
indicates a consistency error.
value : int | float | str
The value of the configuration to be set. Must be a simple type as
configuration files should have pretty primitive types.
Returns
-------
None
"""
# We need to sanitize the key first.
sanitize_key = sanitize_configuration_key(key=key)
# And the value.
sanitize_value = sanitize_configuration_value(value=value)
# We then check the main configuration module for consistency before
# applying it.
if not hasattr(lezargus.config, sanitize_key):
logging.critical(
critical_type=logging.ConfigurationError,
message=(
"Lezargus configuration does not support the"
f" {sanitize_key} key."
),
)
# Otherwise, we apply it.
setattr(lezargus.config, sanitize_key, sanitize_value)
[docs]
def apply_configuration(configuration: dict) -> None:
"""Apply the configuration, input structured as a dictionary.
Note configuration files should be flat, there should be no nested
configuration parameters.
Parameters
----------
configuration : dict
The configuration dictionary we are going to apply.
Returns
-------
None
"""
# Constants typically are all capitalized in their variable naming.
capital_configuration = {
keydex.upper(): valuedex for keydex, valuedex in configuration.items()
}
# Check that the configuration names were capitalized.
for keydex, capital_keydex in zip(
configuration.keys(),
capital_configuration.keys(),
strict=True,
):
if keydex.casefold() != capital_keydex.casefold():
logging.error(
error_type=logging.ConfigurationError,
message=(
"The following configuration keys differ on the case"
f" transformation: {keydex} -> {capital_keydex}"
),
)
if keydex != capital_keydex:
logging.error(
error_type=logging.ConfigurationError,
message=(
"The keys of configuration parameters should be in all"
" capital letters. The following key is inappropriate:"
f" {keydex}"
),
)
# We just sanitize the configuration.
sanitize_config = sanitize_configuration(
configuration=capital_configuration,
)
# Apply it to the configuration.
for keydex, valuedex in sanitize_config.items():
assign_configuration(key=keydex, value=valuedex)
[docs]
def read_configuration_file(filename: str) -> dict:
"""Read the configuration file and output a dictionary of parameters.
Note configuration files should be flat, there should be no nested
configuration parameters.
Parameters
----------
filename : str
The filename of the configuration file, with the extension. Will raise
if the filename is not the correct extension, just as a quick check.
Returns
-------
configuration : dict
The dictionary which contains all of the configuration parameters
within it.
"""
# Checking the extension is valid, just as a quick sanity check that the
# configuration file is proper.
config_extension = ("yaml", "yml")
filename_ext = lezargus.library.path.get_file_extension(pathname=filename)
if filename_ext not in config_extension:
logging.error(
error_type=logging.FileError,
message=(
"Configuration file does not have the proper extension, it"
" should be a yaml file."
),
)
# Loading the configuration file.
try:
with open(filename, encoding="utf-8") as config_file:
raw_configuration = dict(
yaml.load(config_file, Loader=yaml.SafeLoader),
)
except FileNotFoundError:
# The file is not found, it cannot be opened.
logging.critical(
critical_type=logging.FileError,
message=(
"The following configuration filename does not exist:"
f" {filename}"
),
)
# Double check that the configuration is flat as per the documentation
# and expectation.
for valuedex in raw_configuration.values():
if isinstance(valuedex, dict):
# A dictionary implies a nested configuration which is not allowed.
logging.error(
error_type=logging.ConfigurationError,
message=(
"The configuration file should not have any embedded"
" configurations, it should be a flat file. Please use the"
" configuration file templates."
),
)
# A final clean up of the configuration.
configuration = sanitize_configuration(configuration=raw_configuration)
# The configuration dictionary should be good.
return configuration
[docs]
def _convert_default_configuration_yaml(section: str = "ALL") -> list[str]:
"""Create a temporary configuration YAML, returning the file lines.
The configuration file by default is a Python file to satisfy the type
checker. However, most people will be using YAML files as it is safer.
We convert the default configuration file to a YAML file, temporarily
within the context manager, to manipulate, and provide to the user.
The configuration file is split into sections based on the section tags.
If a section label is provided, we only provide the configuration within
the tags.
Parameters
----------
section : str, default
The section label. We limit the YAML file to the section label subset.
By default, we use ALL, the full file.
Returns
-------
yaml_lines : str
The file lines of the configuration YAML file.
"""
# We get the default configuration Python file.
config_py_filename = lezargus.library.path.merge_pathname(
directory=lezargus.config.INTERNAL_MODULE_INSTALLATION_PATH,
filename="config",
extension="py",
)
# We read in the Python file, care is needed to not load anything.
with open(config_py_filename, encoding="utf-8") as config_py:
all_config_py_lines = config_py.readlines()
# We do not need the new line characters nor padding.
all_config_py_lines = [
linedex.removesuffix("\n").strip()
for linedex in all_config_py_lines
]
# By convention, we have tagged the beginning and end of the configuration
# part of the Python file. So we only need those parts.
clean_section = section.upper().strip()
start_tag = f"# <BEGIN {clean_section}>"
end_tag = f"# </END {clean_section}>"
# We find the index parts.
start_index = None
end_index = None
for index, linedex in enumerate(all_config_py_lines):
if start_tag in linedex:
start_index = index
if end_tag in linedex:
end_index = index
# If we don't have either a start or end index, the tag provided is likely
# incorrect.
if start_index is None or end_index is None:
start_index = 0
end_index = -1
logging.error(
error_type=logging.InputError,
message=(
f"Configuration section tag {section} does not match any"
" sections."
),
)
# We want the end tag as well.
config_py_lines = all_config_py_lines[start_index : end_index + 1]
# We will be testing the configuration lines to make sure they are
# properly formatted before we attempt the conversion. The test using
# regular expressions, it is better to compile it early.
regex_test_pattern = r"[A-Z_]+ = [^=\n]*"
regex_test = re.compile(regex_test_pattern)
# The only real difference between the Python file and the YAML file is
# the = and : as seperators. We leverage this fact and just change the
# character.
yaml_lines = []
for linedex in config_py_lines:
# Clearing out some cases where we should not change anything.
if linedex.startswith("#"):
# Line is a comment, no action taken. We just add it and move on.
yaml_lines.append(linedex)
continue
if len(linedex) == 0:
# Line is blank, no action can be taken. We just add it and move
# on.
yaml_lines.append(linedex)
continue
# We need to test that the configuration line is correct.
if regex_test.match(linedex) is None:
logging.critical(
critical_type=logging.ConfigurationError,
message=(
f"Configuration line {linedex} does not match the expected"
" format of a configuration setting."
),
)
else:
# We just replace the = with a :.
new_linedex = linedex.replace(R" = ", R" : ", 1)
yaml_lines.append(new_linedex)
# All done
return yaml_lines
[docs]
@contextlib.contextmanager
def _convert_default_configuration_file(
section: str = "ALL",
) -> hint.Iterator[str]:
"""Create a temporary configuration YAML file, returning the path.
See py:func:`_convert_default_configuration_yaml` for more information.
Parameters
----------
section : str, default
The section label. We limit the YAML file to the section label subset.
By default, we use ALL, the full file.
Yeilds
------
filename : str
The filename of the default YAML file, we create it internally then
delete it afterwards.
"""
yaml_lines = _convert_default_configuration_yaml(section=section)
# Now that we have the YAML format file, we can write it. We will not use
# the temporary directory because it might not be configured yet.
# We want to make sure there is no file conflict.
yaml_random_filename = lezargus.library.path.merge_pathname(
directory=lezargus.config.INTERNAL_MODULE_INSTALLATION_PATH,
filename=f"temp_config_{uuid.uuid4()}",
extension="yaml",
)
# Finally, saving the file. We need to make our own new line characters.
raw_yaml_lines = [linedex + "\n" for linedex in yaml_lines]
with open(yaml_random_filename, "xt", encoding="utf-8") as yaml_file:
yaml_file.writelines(raw_yaml_lines)
# We yeild to the user to execute the file as they may want to.
yield yaml_random_filename
# Finally, we clean up the file, it is not needed anymore and we don't
# want it sticking around.
os.remove(yaml_random_filename)
[docs]
def write_configuration_file(
filename: str,
configuration: dict[str, hint.Any] | None = None,
section: str = "ALL",
overwrite: bool = False,
) -> None:
"""Write a configuration file based on provided configuration.
Note configuration files should be flat, there should be no nested
configuration parameters. Moreover, we only write configurations present
as default or as overwritten by the provided configuration, within the
section tag as provided. This function does not account for current live
configurations.
Parameters
----------
filename : str
The filename of the configuration file to be saved, with the extension.
An error will be provided if the extension is not a correct extension.
configuration : dict, default = None
The configuration which we will save, along with any defaults present
in the main configuration file. If None, only defaults are saved.
section : str
The section label for us to reduce the scope of the configuration
file we will be writing.
overwrite : bool
If True, we overwrite the configuration file if already present.
Returns
-------
None
"""
# We need to sanitize the input configuration first.
configuration = {} if configuration is None else configuration
configuration = sanitize_configuration(configuration=configuration)
# We also need the default configuration.
default_configuration = get_default_configuration(section=section)
# Applying any overwrites.
writing_configuration = {**default_configuration, **configuration}
# We want to preserve the comment information explaining the
# configurations, so, instead of just dumping the YAML, we attempt to just
# create a file inplace and manually change the required lines.
default_lines = _convert_default_configuration_yaml(section=section)
# Now we search through all lines, finding the needed fields we need to
# replace.
writing_lines = []
for linedex in default_lines:
# If the line is blank, we want to keep it blank in the write out.
if len(linedex) == 0:
writing_lines.append("")
continue
# If the line is a comment, it is likely documentation so we keep it
# as is.
if linedex.startswith("#"):
writing_lines.append(linedex)
continue
# If it is a `key : value` pair, we need to determine the key and value
# and replace it with the new one if needed.
if R" : " in linedex:
# We do not want to split more than the key value pair itself. The
# key should never have : in it.
default_key, default_value = linedex.split(R" : ", maxsplit=1)
writing_key = default_key.strip()
# Now we need to determine if the value is contained within the
# writing configuration.
writing_value = writing_configuration.get(
writing_key,
default_value,
)
temp_writing_line = f"{writing_key} : {writing_value}"
writing_lines.append(temp_writing_line)
# If a configuration line does not meet any of the above, it is not
# parsable for writing.
logging.error(
logging.ConfigurationError,
message=(
f"Configuration line cannot be parsed for writing: {linedex}"
),
)
# We need to do a few checks for the configuration file path.
config_extension = ("yaml", "yml")
filename_ext = lezargus.library.path.get_file_extension(pathname=filename)
if filename_ext not in config_extension:
logging.error(
error_type=logging.FileError,
message=(
f"Configuration filename has extension {filename_ext}, not a"
" YAML extension (yml or yaml)."
),
)
# We also check the directory and path.
directory = lezargus.library.path.get_directory(pathname=filename)
if not os.path.isdir(directory):
# The directory of the file does not exist.
logging.warning(
warning_type=logging.FileWarning,
message=(
f"Saving filename directory {directory} does not exist,"
" creating it."
),
)
os.makedirs(directory, exist_ok=True)
# And we check if the file exists.
if os.path.isfile(filename) and not overwrite:
logging.critical(
critical_type=logging.FileError,
message=f"Configuration file {filename} already exists.",
)
# Finally, saving the file. We need to make our own new line characters.
yaml_lines = [linedex + "\n" for linedex in writing_lines]
with open(filename, mode="w", encoding="utf-8") as new_file:
new_file.writelines(yaml_lines)
[docs]
def get_default_configuration(section: str = "ALL") -> dict:
"""Get the default configuration dictionary.
Parameters
----------
section : str
The section label for us to reduce the scope of the default
configuration provided
Returns
-------
default_configuration : dict
The total default configuration dictionary.
"""
# Loading the default configuration
with _convert_default_configuration_file(section=section) as default:
default_configuration = read_configuration_file(filename=default)
return default_configuration
[docs]
def create_configuration_file(
filename: str,
section: str = "ALL",
overwrite: bool = False,
) -> None:
"""Create a copy of the default configuration file to the given location.
Parameters
----------
filename : str
The filename of the new configuration file to be saved, with the
extension. An error will be provided if the extension is not a
correct extension.
section : str
The section label for us to reduce the scope of the configuration
file we will be writing.
overwrite : bool, default = False
If the file already exists, overwrite it. If False, it would raise
an error instead.
Returns
-------
None
"""
# This really is just the same as writing a configuration file, just with
# no configuration changes from the default.
write_configuration_file(
filename=filename,
configuration=None,
section=section,
overwrite=overwrite,
)
[docs]
def load_configuration_file(filename: str) -> None:
"""Load a configuration file, then apply it.
Reads a configuration file, the applies it to the current configuration.
Note configuration files should be flat, there should be no nested
configuration parameters.
Parameters
----------
filename : str
The filename of the configuration file, with the extension. Will raise
if the filename is not the correct extension, just as a quick check.
Returns
-------
None
"""
# Loading a configuration is simply just reading the file, then applying
# the configuration.
configuration = read_configuration_file(filename=filename)
apply_configuration(configuration=configuration)
# Notifying that it was applied.
logging.info(
message=f"Configuration file {filename} was loaded and applied.",
)
[docs]
def save_configuration_file(
filename: str,
section: str = "ALL",
overwrite: bool = False,
) -> None:
"""Save the current live configuration to a configuration file.
This function saves the current configuration to a configuration file.
The entire configuration file is provided by default to replicate
the current settings, but a section tag may be provided.
Parameters
----------
filename : str
The filename of the configuration file to be saved, with the
extension. An error will be provided if the extension is not a
correct extension.
section : str
The section label for us to reduce the scope of the configuration
file we will be saving.
overwrite : bool, default = False
If the file already exists, overwrite it. If False, it would raise
an error instead.
Returns
-------
None
"""
# We need to export the current configuration. Extracting all of the
# attributes ought to be fine but we need to sort though it after.
raw_state = vars(lezargus.config)
# We get the parts of the configuration that we actually need.
default_configuration = get_default_configuration(section=section)
# We cycle through the raw state, extracting the required configurations
# from it.
live_configuration = {
keydex: raw_state.get(keydex, valuedex)
for keydex, valuedex in default_configuration.items()
}
# Finally, we save the live configuration.
write_configuration_file(
filename=filename,
configuration=live_configuration,
section=section,
overwrite=overwrite,
)