From 6cc8cd9499b41c4e88218fe49d850f3dcc70ec06 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 9 Nov 2022 15:47:45 +0000 Subject: [PATCH 1/4] jsonschema_validation: Group errors by type https://github.com/Open-Telecoms-Data/cove-ofds/issues/4 https://github.com/Open-Telecoms-Data/cove-ofds/issues/22 --- cove_ofds/jsonschema_validation_errors.py | 72 +++++++++ cove_ofds/process.py | 28 +++- cove_ofds/templates/cove_ofds/explore.html | 2 +- .../jsonschema_validation_panel.html | 151 ++++++++++++++++++ .../jsonschema_validation_table.html | 39 +++++ .../templates/cove_ofds/validation_table.html | 62 ------- 6 files changed, 285 insertions(+), 69 deletions(-) create mode 100644 cove_ofds/jsonschema_validation_errors.py create mode 100644 cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html create mode 100644 cove_ofds/templates/cove_ofds/jsonschema_validation_table.html delete mode 100644 cove_ofds/templates/cove_ofds/validation_table.html diff --git a/cove_ofds/jsonschema_validation_errors.py b/cove_ofds/jsonschema_validation_errors.py new file mode 100644 index 0000000..5a57286 --- /dev/null +++ b/cove_ofds/jsonschema_validation_errors.py @@ -0,0 +1,72 @@ +def add_type_to_json_schema_validation_error(data: dict) -> dict: + + if data["validator"] == "prefixItems": + data["cove_type"] = "PrefixItems" + + elif data["validator"] == "const": + data["cove_type"] = "Valuedoesnotmatchconstant" + + elif data["validator"] == "minItems": + data["cove_type"] = "Emptyarray" + + elif data["validator"] == "uniqueItems": + data["cove_type"] = "Nonuniqueitems" + + elif data["validator"] == "pattern" and data["instance"] in [ + "properties", + "features", + ]: + data["cove_type"] = "Fieldnamedoesnotmatchpattern" + + elif data["validator"] == "pattern" and data["instance"] == "describedby": + data["cove_type"] = "Valuedoesnotmatchpattern" + + elif data["validator"] == "minLength": + data["cove_type"] = "Emptystring" + + elif data["validator"] == "enum": + data["cove_type"] = "Valuedoesnotmatchanycodes." + + elif data["validator"] == "type" and data["validator_value"] == "boolean": + data["cove_type"] = "Valueisnotaboolean" + + elif data["validator"] == "type" and data["validator_value"] == "integer": + data["cove_type"] = "Valueisnotaninteger" + + elif data["validator"] == "type" and data["validator_value"] == "number": + data["cove_type"] = "Valueisnotanumber" + + elif data["validator"] == "type" and data["validator_value"] == "string": + data["cove_type"] = "Valueisnotastring" + + elif data["validator"] == "type" and data["validator_value"] == "object": + data["cove_type"] = "Valueisnotanobject" + + elif data["validator"] == "type" and data["validator_value"] == "array": + data["cove_type"] = "Valueisnotanarray" + + elif data["validator"] == "required": + data["cove_type"] = "Missingrequiredfields" + + elif data["validator"] == "minProperties": + data["cove_type"] = "Emptyobject" + + elif data["validator"] == "format" and data["validator_value"] == "date": + data["cove_type"] = "Incorrectlyformatteddate" + + elif data["validator"] == "format" and data["validator_value"] == "iri": + data["cove_type"] = "Incorrectlyformattediri" + + elif data["validator"] == "format" and data["validator_value"] == "uri": + data["cove_type"] = "Incorrectlyformatteduri" + + elif data["validator"] == "format" and data["validator_value"] == "uuid": + data["cove_type"] = "Incorrectlyformatteduuid" + + else: + data["cove_type"] = "unknown" + + # TODO this should be in lib + data["path_no_num"] = tuple(key for key in data["path"] if isinstance(key, str)) + + return data diff --git a/cove_ofds/process.py b/cove_ofds/process.py index ffe4184..e7c9674 100644 --- a/cove_ofds/process.py +++ b/cove_ofds/process.py @@ -9,6 +9,7 @@ from libcoveofds.python_validate import PythonValidate from libcoveofds.schema import OFDSSchema +import cove_ofds.jsonschema_validation_errors from libcoveweb2.models import SuppliedDataFile from libcoveweb2.process import ProcessDataTask @@ -541,14 +542,29 @@ def process(self, process_data: dict) -> dict: schema = OFDSSchema() worker = JSONSchemaValidator(schema) - context = {"validation_errors": worker.validate(data)} - context["validation_errors"] = [i.json() for i in context["validation_errors"]] - context["validation_errors_count"] = len(context["validation_errors"]) - context["validation_errors"] = group_data_list_by( - context["validation_errors"], - lambda i: str(i["path"]) + i["validator"] + i["message"], + # Get list of validation errors + validation_errors = worker.validate(data) + validation_errors = [i.json() for i in validation_errors] + + # Add type to each + validation_errors = [ + cove_ofds.jsonschema_validation_errors.add_type_to_json_schema_validation_error( + i + ) + for i in validation_errors + ] + + # Add count + context = {"validation_errors_count": len(validation_errors)} + + # group by type + validation_errors = group_data_list_by( + validation_errors, lambda i: str(i["cove_type"]) ) + # and we are done + context["validation_errors"] = validation_errors + with open(self.data_filename, "w") as fp: json.dump(context, fp, indent=4) diff --git a/cove_ofds/templates/cove_ofds/explore.html b/cove_ofds/templates/cove_ofds/explore.html index 105fa7c..d97b424 100644 --- a/cove_ofds/templates/cove_ofds/explore.html +++ b/cove_ofds/templates/cove_ofds/explore.html @@ -188,7 +188,7 @@

{% trans 'The structure and format of your data does not conform to the OFDS schema. You should check your mapping and data pipeline for errors. For more information, see the ' %}{% trans 'reference documentation' %}.

- {% include "cove_ofds/validation_table.html" %} + {% include "cove_ofds/jsonschema_validation_panel.html" %}
{% else %} diff --git a/cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html b/cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html new file mode 100644 index 0000000..dbedf81 --- /dev/null +++ b/cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html @@ -0,0 +1,151 @@ +{% load i18n %} + +{% if 'prefixItems' in validation_errors %} +

{% trans 'prefixItems' %}

+

DESCIRPTION

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.prefixItems %} +{% endif %} + + +{% if 'Valuedoesnotmatchconstant' in validation_errors %} +

{% trans 'Value does not match constant' %}

+

You must update each value to match the constant specified in the schema.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valuedoesnotmatchconstant %} +{% endif %} + + +{% if 'prefixItems' in validation_errors %} +

{% trans 'Empty array' %}

+

You must omit empty arrays from your data in their entirety (key and value).

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Emptyarray %} +{% endif %} + + +{% if 'Nonuniqueitems' in validation_errors %} +

{% trans 'Non-unique items' %}

+

You must ensure that the items in each array are unique.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Nonuniqueitems %} +{% endif %} + + +{% if 'Fieldnamedoesnotmatchpattern' in validation_errors %} +

{% trans 'Field name does not match pattern' %}

+

You must ensure that fields in `Node.location` and `Span.route` are not named 'properties' or 'nodes'.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Fieldnamedoesnotmatchpattern %} +{% endif %} + +{% if 'Valuedoesnotmatchpattern' in validation_errors %} +

{% trans 'Value does not match pattern' %}

+

You must ensure that only the first item in the `links` array has `.rel` set to 'describedBy`.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Fieldnamedoesnotmatchpattern %} +{% endif %} + + +{% if 'Emptystring' in validation_errors %} +

{% trans 'Empty string' %}

+

You must omit empty strings from your data in their entirety (key and value).

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Emptystring %} +{% endif %} + + +{% if 'Valuedoesnotmatchanycodes' in validation_errors %} +

{% trans 'Value does not match any codes.' %}

+

You must update each value to match a code from the codelist specified in the schema.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valuedoesnotmatchanycodes %} +{% endif %} + + +{% if 'Valueisnotaboolean' in validation_errors %} +

{% trans 'Value is not a boolean' %}

+

You must ensure that each value is either `true` or `false`. You should check that values are not enclosed in qoute characters (`"`).

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotaboolean %} +{% endif %} + + +{% if 'Valueisnotaninteger' in validation_errors %} +

{% trans 'Value is not an integer' %}

+

+ You must ensure that each value contains only digits (`0-9`) and, optionally, the dot character (`.`). Integer values must have either no fractional part (e.g. `1`) or a zero fractional part (e.g. `1.0`). + You should check that values are not enclosed in quote characters, e.g. `1` is an integer, but `"1"` is a string. +

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotaninteger %} +{% endif %} + + +{% if 'Valueisnotanumber' in validation_errors %} +

{% trans 'Value is not a number' %}

+

You must ensure that each value contains only digits (`0-9`) and, optionally, the dot character (`.`). You should check that values are not enclosed in quote characters, e.g. `1` is an integer, but `"1"` is a string.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotanumber %} +{% endif %} + + +{% if 'Valueisnotastring' in validation_errors %} +

{% trans 'Value is not a string' %}

+

You must ensure that each value begins and ends with the quote character (`"`) and that any quotes within the value are escaped with a backslash (`\`).

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotastring %} +{% endif %} + + +{% if 'Valueisnotanobject' in validation_errors %} +

{% trans 'Value is not an object' %}

+

You must ensure that each value is enclosed in curly braces (`{` and `}`) and contains only key/value pairs.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotanobject %} +{% endif %} + + +{% if 'Valueisnotanarray' in validation_errors %} +

{% trans 'Value is not an array' %}

+

You must ensure that each value is enclosed in square brackets (`[` and `]`).

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Valueisnotanarray %} +{% endif %} + + +{% if 'Missingrequiredfields' in validation_errors %} +

{% trans 'Missing required fields' %}

+

You must ensure that your data contains the required fields specified in the schema.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Missingrequiredfields %} +{% endif %} + + +{% if 'Emptyobject' in validation_errors %} +

{% trans 'Empty object' %}

+

You must omit empty objects from your data in their entirety (key and value).

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Emptyobject %} +{% endif %} + + +{% if 'Incorrectlyformatteddate' in validation_errors %} +

{% trans 'Incorrectly formatted date' %}

+

You must ensure that each date is in `"YYYY-MM-DD"` format.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformatteddate %} +{% endif %} + + +{% if 'Incorrectlyformattediri' in validation_errors %} +

{% trans 'Incorrectly formatted iri' %}

+

You must ensure that each iri is formatted according to RFC3897.

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformattediri %} +{% endif %} + + +{% if 'Incorrectlyformatteduri' in validation_errors %} +

{% trans 'Incorrectly formatted uri' %}

+

You must ensure that each uri is formatted according to [RFC3896](https://www.rfc-editor.org/rfc/rfc3986).

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformatteduri %} +{% endif %} + + +{% if 'Incorrectlyformatteduuid' in validation_errors %} +

{% trans 'Incorrectly formatted uuid' %}

+

+ You must ensure that each uuid is formatted according to [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122). + For more information, see how to generate universally unique identifiers. +

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformatteduuid %} +{% endif %} + + +{% if 'unknown' in validation_errors %} +

{% trans 'unknown' %}

+ {% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.unknown %} +{% endif %} diff --git a/cove_ofds/templates/cove_ofds/jsonschema_validation_table.html b/cove_ofds/templates/cove_ofds/jsonschema_validation_table.html new file mode 100644 index 0000000..7d219b6 --- /dev/null +++ b/cove_ofds/templates/cove_ofds/jsonschema_validation_table.html @@ -0,0 +1,39 @@ +{% load i18n %} +{% load cove_tags %} + + + + + + + + + + + {% for error in validation_errors_for_table %} + + + + + + {% endfor %} + +
{% trans 'Identifiers' %}{% trans 'Path' %}{% trans 'Value' %}
+ {% if error.data_ids.network_id or error.data_ids.span_id or error.data_ids.node_id %} + {% if error.data_ids.network_id %} +
{% trans 'Network' %}: {{ error.data_ids.network_id }}
+ {% endif %} + {% if error.data_ids.span_id %} +
{% trans 'Span' %}: {{ error.data_ids.span_id }}
+ {% endif %} + {% if error.data_ids.node_id %} +
{% trans 'Node' %}: {{ error.data_ids.node_id }}
+ {% endif %} + {% else %} + N/A + {% endif %} +
+ /{{ error.path|join:"/" }} + + {{ error.instance }} +
diff --git a/cove_ofds/templates/cove_ofds/validation_table.html b/cove_ofds/templates/cove_ofds/validation_table.html deleted file mode 100644 index c6a4784..0000000 --- a/cove_ofds/templates/cove_ofds/validation_table.html +++ /dev/null @@ -1,62 +0,0 @@ -{% load i18n %} -{% load cove_tags %} - - - - - - - - - - - - {% for key, errors in validation_errors.items %} - - - - - - - - {% endfor %} - -
{% trans 'Error Description' %}{% trans 'Error Count' %}{% trans 'Location of first 3 errors' %}{% trans 'Ids of first 3 errors' %}
-

- {{ errors.0.message }} -

-
-
    - {% for error in errors|slice:":3" %} -
  • {{value.value}}
  • - {% endfor %} -
-
-
    - {% for error in errors|slice:":3" %} -
  • - {{ error.path }} -
  • - {% endfor %} -
-
-
    - {% for error in errors|slice:":3" %} -
  • - {% if error.data_ids.network_id or error.data_ids.span_id or error.data_ids.node_id %} - {% if error.data_ids.network_id %} - {% trans 'Network' %}: {{ error.data_ids.network_id }} - {% endif %} - {% if error.data_ids.span_id %} - {% trans 'Span' %}: {{ error.data_ids.span_id }} - {% endif %} - {% if error.data_ids.node_id %} - {% trans 'Node' %}: {{ error.data_ids.node_id }} - {% endif %} - {% else %} - N/A - {% endif %} -
  • - {% endfor %} -
-
From b4d46a859a7fa0fa97e0904433e7460088ed5e0f Mon Sep 17 00:00:00 2001 From: James Date: Thu, 10 Nov 2022 07:56:15 +0000 Subject: [PATCH 2/4] jsonschema_validation: Markdown links to html links https://github.com/Open-Telecoms-Data/cove-ofds/pull/32#issuecomment-1309483332 https://github.com/Open-Telecoms-Data/cove-ofds/issues/4 https://github.com/Open-Telecoms-Data/cove-ofds/issues/22 --- .../templates/cove_ofds/jsonschema_validation_panel.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html b/cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html index dbedf81..f07d98f 100644 --- a/cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html +++ b/cove_ofds/templates/cove_ofds/jsonschema_validation_panel.html @@ -130,7 +130,7 @@

{% trans 'Incorrectly formatted iri' %}

{% if 'Incorrectlyformatteduri' in validation_errors %}

{% trans 'Incorrectly formatted uri' %}

-

You must ensure that each uri is formatted according to [RFC3896](https://www.rfc-editor.org/rfc/rfc3986).

+

You must ensure that each uri is formatted according to RFC3896.

{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformatteduri %} {% endif %} @@ -138,7 +138,7 @@

{% trans 'Incorrectly formatted uri' %}

{% if 'Incorrectlyformatteduuid' in validation_errors %}

{% trans 'Incorrectly formatted uuid' %}

- You must ensure that each uuid is formatted according to [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122). + You must ensure that each uuid is formatted according to RFC4122. For more information, see how to generate universally unique identifiers.

{% include "cove_ofds/jsonschema_validation_table.html" with validation_errors_for_table=validation_errors.Incorrectlyformatteduuid %} From 93ade9f9a92c0ad84c42f9673e818fc612c10783 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 10 Nov 2022 08:04:33 +0000 Subject: [PATCH 3/4] jsonschema_validation: Add comments https://github.com/Open-Telecoms-Data/cove-ofds/issues/4 https://github.com/Open-Telecoms-Data/cove-ofds/issues/22 --- cove_ofds/jsonschema_validation_errors.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cove_ofds/jsonschema_validation_errors.py b/cove_ofds/jsonschema_validation_errors.py index 5a57286..2a30d6d 100644 --- a/cove_ofds/jsonschema_validation_errors.py +++ b/cove_ofds/jsonschema_validation_errors.py @@ -1,3 +1,5 @@ +# This function may be a candidate to move to libcoveofds? +# It could do with some testing, wherever it ends up def add_type_to_json_schema_validation_error(data: dict) -> dict: if data["validator"] == "prefixItems": @@ -12,6 +14,8 @@ def add_type_to_json_schema_validation_error(data: dict) -> dict: elif data["validator"] == "uniqueItems": data["cove_type"] = "Nonuniqueitems" + # these 2 pattern checks are brittle + # using instance is not a great choice as that may easily change if the schema changes. TODO elif data["validator"] == "pattern" and data["instance"] in [ "properties", "features", @@ -66,7 +70,7 @@ def add_type_to_json_schema_validation_error(data: dict) -> dict: else: data["cove_type"] = "unknown" - # TODO this should be in lib + # TODO this should be in libcoveofds data["path_no_num"] = tuple(key for key in data["path"] if isinstance(key, str)) return data From 5ef73af23fc06027a7e7ac173a4c4feefaf9c695 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 10 Nov 2022 08:30:57 +0000 Subject: [PATCH 4/4] process: Add download_geojson_meta_url var from ConvertJSONIntoGeoJSON --- cove_ofds/process.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cove_ofds/process.py b/cove_ofds/process.py index e7c9674..147f32e 100644 --- a/cove_ofds/process.py +++ b/cove_ofds/process.py @@ -279,6 +279,9 @@ def get_context(self): context["download_geojson_spans_url"] = os.path.join( self.supplied_data.data_url(), "spans.geo.json" ) + context["download_geojson_meta_url"] = os.path.join( + self.supplied_data.data_url(), "geojson.meta.json" + ) context["download_geojson_nodes_size"] = os.stat( self.nodes_file_name ).st_size