I have the following json schema that I want to use to validate data. It works well when using jsonschema
to validate the basic information and conditional scenarios.
But I have a specific problem where I cannot easily assess whether data is present but shouldnt be, based on the allOf conditions. Its possible to write something custom for simple conditions in python, but where conditions are more complex it may be tricky to scale. Does anyone have any hints or know of any tooling that could help. Many thanks
example schema
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"type": "object",
"properties": {
"colours": {
"type": "integer",
"oneOf": [
{"const": 1},
{"const": 2},
{"const": 3},
{"const": 19}
]
},
"years": {
"type": "integer",
"minimum": 1,
"maximum": 10
},
"sleep": {
"type": "integer",
"oneOf": [
{"const": 1},
{"const": 2},
{"const": 5}
]
},
"shapes": {
"type": "array",
"items": {
"type": "integer",
"anyOf": [
{"const": 2},
{"const": 3}
]
}
},
"gender": {
"type": "integer",
"oneOf": [
{"const": 1},
{"const": 2}
]
}
},
"required": ["colours", "years"],
"allOf": [
{
"if": {
"allOf": [
{
"properties": {
"colours": {"const": 1}
}
},
{
"properties": {
"years": {
"enum": [7,8,9]
}
}
}
]
},
"then": {
"required": ["shapes"]
}
},
{
"if": {
"allOf": [
{
"properties": {
"shapes": {
"anyOf": [
{"const": 1},
{"const": 3}
]
}
}
},
{
"properties": {
"colours": {"const": 19}
}
}
]
},
"then": {
"required": ["gender"]
}
},
{
"if": {
"properties": {
"years": {
"not": {"const": 1}
}
}
},
"then": {
"required": ["sleep"]
}
}
]
}
example data
data_list = [
{"id": "a", "colours": 1, "years": 9, "sleep": 4, "shapes": [1], "gender": 3}, # valid logic but invalid value for sleep
{"id": "b", "colours": 1, "years": 6, "shapes": 3, "gender": 3}, # invalid as sleep should not be present, nor should gender since the entry to shape is incorrect, shapes also not array
{"id": "c", "colours": 1, "years": 9, "sleep": 3}, # invalid as shapes should be present
{"id": "d", "colours": 1, "years": 1, "sleep": 5}, # invalid as sleep is present but shouldnt be
]
example code
from jsonschema import validate, Draft7Validator, ValidationError
for data in data_list:
validator = Draft7Validator(schema)
errors = sorted(validator.iter_errors(data), key=lambda e: e.path)
if errors:
print(data.get("id"))
for error in errors:
print(f"Schema Error in {list(error.path)}: {error.message}")
# not indicating that sleep is not required for id d