You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm using Cerberus to validate configs for a framework that can run a multitude of tasks with many dependent configurations. The configs are such that the schema changes heavily depending on what is specified. In an attempt to make this manageable, I wrote schemas for the individual parts and store them in the schema_registry. When the config is validated, the name of the required validation is build from field values specified in the config, that corresponding schema is retrieved from the registry and then applied.
My code looks like this:
`import os
from cerberus.validator import Validator, schema_registry, rules_set_registry
import importlib
import pkgutil
import re
class ConfigValidator(Validator):
def init(self, *args, **kwargs):
super().init(*args, **kwargs)
def _validate_custom_validator_from_field_value(
self, constraint: list[str,dict] | dict | str, field: str, value:str
) -> None:
"""
Validate a field by applying a custom schema validation from the schema registry, named 'constraint'. 'constraint' can be compiled
from values of fields from within the config being validated.
Example 1:
config = {
"parameters": {
"env: "dev",
"class: "S3ToGlue",
"size: "large"
}
}
schema = {
"parameters":{
"custom_validator_from_field_value": "SomeCustomSchemaName"
}
}
Example 2:
schema = {
"parameters":{
"custom_validator_from_field_value": {"lookup": "parameters.class"} <-- Example uses custom lookup feature to get the value of class
}
}
Example 3:
schema = {
"parameters":{
"custom_validator_from_field_value": [{"lookup": "parameters.class"}, "CustomValue", {"lookup":"parameters.env"}] <-- Example uses a list of values, which gets concatenated and converted to CamelCase as S3ToGlueCustomValueDev
}
}
In this example, the dictionary 'parameters' will be evaluated according to the schema named 'S3ToGlue' in the schema_registry.
Args:
constraint (str): The name constraint applied.
field (str): The name of the field.
value (dict): The value of the field.
#? The below part of the docstring is a Cerberus requirement.
#? Without it you will get a 'UserWarning: No validation schema is defined for the arguments of rule' error.
The rule's arguments are validated against this schema:
{'type': ['list','dict','string']}
"""
custom_validation_name = self._find_validation_name(
constraint=constraint, field=field
)
if not custom_validation_name:
self._error(field, f"Field {constraint} cannot be empty.")
custom_validation_schema = schema_registry._storage.get(
custom_validation_name, None
)
if not custom_validation_schema:
self._error(
field,
f"No custom validation schema named {custom_validation_name}",
) # ? Potential TODO: Print the values that are valid.
else:
custom_validator = ConfigValidator(schema=custom_validation_schema)
validation = custom_validator.validated(document=value)
if validation is None:
self._error(field, f"{field} not valid: {custom_validator.errors}")
def _find_validation_name(self, constraint, field) -> str:
"""
Recursive function to find the name of the custom validation to apply from constraint values supplied.
Args:
constraint (str): Constraint to evaluate.
field (str): Field to which the constraint is applied.
Returns:
custom_validation_name (str): The name of the custom validation to apply.
"""
if isinstance(constraint, list):
names_list = []
for item in constraint:
validation_name = self._find_validation_name(item, field)
names_list.append(validation_name)
custom_validation_name = "_".join(names_list)
elif isinstance(constraint, dict) and "lookup" in constraint:
constraint_value = constraint.get("lookup")
if constraint_value.startswith("^"):
constraint_value = constraint_value.replace("^", f"{field}.")
validation_name = self._lookup_field(constraint_value)[1]
custom_validation_name = validation_name
elif isinstance(constraint, str):
custom_validation_name = constraint
else:
self._error(field, f"{constraint} in {field} is not valid.")
_ = re.split(r"(?<!^)(?=[A-Z])", custom_validation_name)
custom_validation_name = (
"_".join([part.lower() for part in _]).title().replace("_", "")
)
return custom_validation_name
@classmethod
def _populate_schema_registry(cls, schema_package) -> None:
"""
Recursive searches and imports all validation schemas in the package 'specified schema_package_name', and adds them to the scheme registry
with the convention 'FileName':schema.
e.g. schema module 's3_to_glue.py' will be added to the schema registry as 'S3ToGlue'.
Args:
schema_package_name (str): The name of the schema package to import. It is useful to import the package and then specifying the package name as 'package.__name__'.
"""
for loader, name, is_pkg in pkgutil.walk_packages(schema_package.__path__):
full_name = schema_package.__name__ + "." + name
spec = importlib.util.find_spec(full_name)
if spec is not None and spec.submodule_search_locations is None:
schema_module = importlib.import_module(full_name)
schema_file_name = os.path.basename(
getattr(schema_module, "__file__") # s3_to_glue.py
)
schema_name = (
os.path.splitext(schema_file_name)[0]
.title()
.replace("_", "") # S3ToGlue
)
schema_registry.add(schema_name, schema_module.schema)
elif is_pkg:
package = importlib.import_module(full_name)
cls._populate_schema_registry(package)
@classmethod
def _populate_rules_registry(cls, rules_package) -> None:
"""
Recursive searches and imports all validation rules in the package 'specified schema_package_name', and adds them to the rules set registry
with the convention rule_name:rule.
Rules are stored in dictionaries with the variable name 'rules'. Multiple rules can be contained in one dictionary, where each key:value pair
represents a rule i.e.
rules = {
"s3_path_rule": {
"type": "string",
"coerce": lambda v: {v if v.startswith("s3://") is True else f"s3://{v}"},
},
"file_name_rule":{
"type": "string",
"regex": "^[^\s!@#$%^&*()_+={}\[\]:;"'<>,.?/|\\]+$"
}
}
Args:
schema_package_name (str): The name of the schema package to import. It is useful to import the package and then specifying the package name as 'package.__name__'.
"""
for loader, name, is_pkg in pkgutil.walk_packages(rules_package.__path__):
full_name = rules_package.__name__ + "." + name
spec = importlib.util.find_spec(full_name)
if spec is not None and spec.submodule_search_locations is None:
rules_module = importlib.import_module(full_name)
rules_list = []
for name, rule in rules_module.rules.items():
rules_list.append((name, rule))
rules_set_registry.extend(rules_list)
elif is_pkg:
package = importlib.import_module(full_name)
cls._populate_rules_registry(package)
@classmethod
def populate_registries(cls, schema_package, rules_package) -> None:
"""
Wrapper method to populate both the schema and rules registries.
Args:
schema_package_name (str): The name of the schema package to import.
rules_package_name (str): The name of the schema package to import.
"""
cls._populate_schema_registry(schema_package)
cls._populate_rules_registry(rules_package)
`
I do the Validation by creating another instance of ConfigValidator and then validating the subdocuments individually. Reading through the documentation, I see I should have used _get_child_validator instead, however the problem is that normalization in the subdocuments does not work and I don't think that would solve that problem.
What I've realized is that the core functionality that I am looking for is the ability to set the value of the field 'schema' in the schema dict based on values from the document being validated. I have tried using coerce to set schema value, but that does not work.
In short then, it would be great if values in the schema could be defined dynamically from values from the document being validated, for example using something like {'lookup': 'path.to.field.in.document'}, and with the same lookup capabilities as used when setting dependencies. e.g.
Here the lookup retrieves 'foo.bar' from the document, and sets the value of schema accordingly. 'qux' is stored in the schema registry. Resolved, this would equate to:
The text was updated successfully, but these errors were encountered:
charlverster
changed the title
Dynamically set value for schema based on document values
Dynamically set value for schema based on document values [Proposed label: feature_request
Feb 27, 2024
I'm using Cerberus to validate configs for a framework that can run a multitude of tasks with many dependent configurations. The configs are such that the schema changes heavily depending on what is specified. In an attempt to make this manageable, I wrote schemas for the individual parts and store them in the schema_registry. When the config is validated, the name of the required validation is build from field values specified in the config, that corresponding schema is retrieved from the registry and then applied.
My code looks like this:
`import os
from cerberus.validator import Validator, schema_registry, rules_set_registry
import importlib
import pkgutil
import re
class ConfigValidator(Validator):
def init(self, *args, **kwargs):
super().init(*args, **kwargs)
`
I do the Validation by creating another instance of ConfigValidator and then validating the subdocuments individually. Reading through the documentation, I see I should have used _get_child_validator instead, however the problem is that normalization in the subdocuments does not work and I don't think that would solve that problem.
What I've realized is that the core functionality that I am looking for is the ability to set the value of the field 'schema' in the schema dict based on values from the document being validated. I have tried using coerce to set schema value, but that does not work.
I expected the above to be equivalent to this:
In short then, it would be great if values in the schema could be defined dynamically from values from the document being validated, for example using something like {'lookup': 'path.to.field.in.document'}, and with the same lookup capabilities as used when setting dependencies. e.g.
Here the lookup retrieves 'foo.bar' from the document, and sets the value of schema accordingly. 'qux' is stored in the schema registry. Resolved, this would equate to:
The text was updated successfully, but these errors were encountered: