diff --git a/pyQuARC/code/checker.py b/pyQuARC/code/checker.py index 2e603a75..4bb401c7 100644 --- a/pyQuARC/code/checker.py +++ b/pyQuARC/code/checker.py @@ -1,6 +1,7 @@ import json from xmltodict import parse +from concurrent.futures import ThreadPoolExecutor, as_completed from .custom_checker import CustomChecker from .schema_validator import SchemaValidator @@ -154,6 +155,48 @@ def _check_dependencies_validity(self, dependencies, field_dict): return False return True + def _process_field( + self, + func, + check, + rule_id, + metadata_content, + field_dict, + result_dict, + rule_mapping, + ): + """ + Process a single field according to the given rule and update result_dict + """ + external_data = rule_mapping.get("data", []) + relation = rule_mapping.get("relation") + dependencies = self.scheduler.get_all_dependencies( + rule_mapping, check, field_dict + ) + main_field = field_dict["fields"][0] + external_data = field_dict.get("data", external_data) + result_dict.setdefault(main_field, {}) + + if not self._check_dependencies_validity(dependencies, field_dict): + return + + result = self.custom_checker.run( + func, metadata_content, field_dict, external_data, relation + ) + + self.tracker.update_data(rule_id, main_field, result["valid"]) + + # Avoid adding null valid results for rules that are not applied + if result["valid"] is None: + return + + result_dict[main_field][rule_id] = result + + message = self.build_message(result, rule_id) + if message: + result["message"] = message + result["remediation"] = self.message(rule_id, "remediation") + def _run_func(self, func, check, rule_id, metadata_content, result_dict): """ Run the check function for `rule_id` and update `result_dict` @@ -161,36 +204,32 @@ def _run_func(self, func, check, rule_id, metadata_content, result_dict): rule_mapping = self.rules_override.get(rule_id) or self.rule_mapping.get( rule_id ) - external_data = rule_mapping.get("data", []) - relation = rule_mapping.get("relation") list_of_fields_to_apply = rule_mapping.get("fields_to_apply").get( self.metadata_format, {} ) - - for field_dict in list_of_fields_to_apply: - dependencies = self.scheduler.get_all_dependencies( - rule_mapping, check, field_dict - ) - main_field = field_dict["fields"][0] - external_data = field_dict.get("data", external_data) - result_dict.setdefault(main_field, {}) - if not self._check_dependencies_validity(dependencies, field_dict): - continue - result = self.custom_checker.run( - func, metadata_content, field_dict, external_data, relation - ) - - self.tracker.update_data(rule_id, main_field, result["valid"]) - - # this is to avoid "valid" = null in the result, for rules that are not applied - if result["valid"] is None: - continue - result_dict[main_field][rule_id] = result - - message = self.build_message(result, rule_id) - if message: - result["message"] = message - result["remediation"] = self.message(rule_id, "remediation") + with ThreadPoolExecutor(max_workers=5) as executor: + futures = [] + for field_dict in list_of_fields_to_apply: + future = executor.submit( + self._process_field, + func, + check, + rule_id, + metadata_content, + field_dict, + result_dict, + rule_mapping, + ) + futures.append(future) + + # Wait for all futures to complete + for future in as_completed(futures): + # Retrieve the result or raise an exception if an error occurred + try: + future.result() + except Exception as e: + # Handle the exception from the thread + raise e def perform_custom_checks(self, metadata_content): """ diff --git a/pyQuARC/code/constants.py b/pyQuARC/code/constants.py index 30a63f5c..e77c74fc 100644 --- a/pyQuARC/code/constants.py +++ b/pyQuARC/code/constants.py @@ -77,3 +77,13 @@ } CMR_URL = "https://cmr.earthdata.nasa.gov" + +DATE_FORMATS = [ + "%Y-%m-%dT%H:%M:%S.%f", # Year to microsecond + "%Y-%m-%dT%H:%M:%S", # Year to second + "%Y-%m-%dT%H:%M", # Year to minute + "%Y-%m-%dT%H", # Year to hour + "%Y-%m-%d", # Year to day + "%Y-%m", # Year to month + "%Y", # Year +] diff --git a/pyQuARC/code/custom_checker.py b/pyQuARC/code/custom_checker.py index 16006445..f38cedda 100644 --- a/pyQuARC/code/custom_checker.py +++ b/pyQuARC/code/custom_checker.py @@ -1,4 +1,5 @@ from urllib.parse import urlparse +from concurrent.futures import ThreadPoolExecutor, as_completed class CustomChecker: @@ -103,6 +104,33 @@ def _get_path_value(content_to_validate, path_string): ) return container + @staticmethod + def _process_argument(arg, func, relation, external_data, external_relation): + """ + Process the argument by calling the provided function with the given arguments. + + Args: + arg: The argument to be processed. + func: The function to be called. + relation: The relation argument. + external_data: The external data argument. + external_relation: The external relation argument. + + Returns: + A dict containing the updated invalid_values list and the updated validity flag. + """ + + function_args = [*arg] + function_args.extend( + [ + extra_arg + for extra_arg in [relation, *external_data, external_relation] + if extra_arg + ] + ) + func_return = func(*function_args) + return func_return + def run( self, func, content_to_validate, field_dict, external_data, external_relation ): @@ -137,24 +165,35 @@ def run( invalid_values = [] validity = None - for arg in args: - function_args = [*arg] - function_args.extend( - [ - extra_arg - for extra_arg in [relation, *external_data, external_relation] - if extra_arg - ] - ) - func_return = func(*function_args) - valid = func_return["valid"] # can be True, False or None - if valid is not None: - if valid: - validity = validity or (validity is None) - else: - if "value" in func_return: - invalid_values.append(func_return["value"]) - validity = False + + # Process arguments using multithreading + with ThreadPoolExecutor() as executor: + future_results = [] + for arg in args: + future = executor.submit( + self._process_argument, + arg, + func, + relation, + external_data, + external_relation, + ) + future_results.append(future) + + # Retrieve results from futures + for future in as_completed(future_results): + try: + func_return = future.result() + valid = func_return["valid"] # can be True, False or None + if valid is not None: + if valid: + validity = validity or (validity is None) + else: + if "value" in func_return: + invalid_values.append(func_return["value"]) + validity = False + except Exception as e: + raise e result["valid"] = validity result["value"] = invalid_values return result diff --git a/pyQuARC/code/custom_validator.py b/pyQuARC/code/custom_validator.py index 27cbebad..bf3620d1 100644 --- a/pyQuARC/code/custom_validator.py +++ b/pyQuARC/code/custom_validator.py @@ -92,13 +92,13 @@ def one_item_presence_check(*field_values): value = None for field_value in field_values: - if field_value: + if field_value is not None: value = field_value validity = True break return {"valid": validity, "value": value} - + @staticmethod def dif_standard_product_check(*field_values): """ @@ -130,7 +130,7 @@ def license_url_description_check(description_field, url_field, license_text): description_field (string): string describing the URL """ validity = True - value = description_field + value = description_field if not license_text and not url_field: validity = False diff --git a/pyQuARC/code/datetime_validator.py b/pyQuARC/code/datetime_validator.py index 34f67186..fd67e4ef 100644 --- a/pyQuARC/code/datetime_validator.py +++ b/pyQuARC/code/datetime_validator.py @@ -4,7 +4,7 @@ from datetime import datetime from .base_validator import BaseValidator -from .utils import cmr_request, if_arg, set_cmr_prms +from .utils import cmr_request, if_arg, set_cmr_prms, get_date_time class DatetimeValidator(BaseValidator): @@ -117,13 +117,13 @@ def compare(first, second, relation): @staticmethod def validate_datetime_against_granules( - datetime, collection_shortname, version, sort_key, time_key + datetime_string, collection_shortname, version, sort_key, time_key ): """ Validates the collection datetime against the datetime of the last granule in the collection Args: - datetime (str): datetime string + datetime_string (str): datetime string collection_shortname (str): ShortName of the parent collection sort_key (str): choice of start_date and end_date time_key (str): choice of time_end and time_start @@ -143,13 +143,17 @@ def validate_datetime_against_granules( validity = True last_granule_datetime = None + date_time = None + # Compare the precision of the two datetime strings if len(granules["feed"]["entry"]) > 0: last_granule = granules["feed"]["entry"][0] last_granule_datetime = last_granule.get(time_key) - validity = datetime == last_granule_datetime + date_time = get_date_time(datetime_string) + last_granule_datetime = get_date_time(last_granule_datetime) + validity = date_time == last_granule_datetime - return {"valid": validity, "value": (datetime, last_granule_datetime)} + return {"valid": validity, "value": (date_time, last_granule_datetime)} @staticmethod @if_arg diff --git a/pyQuARC/code/url_validator.py b/pyQuARC/code/url_validator.py index 8e23b869..55a74e61 100644 --- a/pyQuARC/code/url_validator.py +++ b/pyQuARC/code/url_validator.py @@ -1,3 +1,4 @@ +import os import requests from urlextract import URLExtract @@ -54,7 +55,7 @@ def status_code_from_request(url): validity = True # extract URLs from text - extractor = URLExtract() + extractor = URLExtract(cache_dir=os.environ.get("CACHE_DIR")) urls = extractor.find_urls(text_with_urls) urls.extend(UrlValidator._extract_http_texts(text_with_urls)) @@ -115,4 +116,4 @@ def doi_link_update(value, bad_urls): if value in bad_urls: validity = False - return {"valid": validity, "Value": value} + return {"valid": validity, "value": value} diff --git a/pyQuARC/code/utils.py b/pyQuARC/code/utils.py index 80f5ab1a..1fe82270 100644 --- a/pyQuARC/code/utils.py +++ b/pyQuARC/code/utils.py @@ -1,10 +1,11 @@ import os import requests import urllib +from datetime import datetime from functools import wraps -from .constants import CMR_URL +from .constants import CMR_URL, DATE_FORMATS def if_arg(func): @@ -64,3 +65,20 @@ def cmr_request(cmr_prms): def collection_in_cmr(cmr_prms): return cmr_request(cmr_prms).get("hits", 0) > 0 + + +def get_date_time(dt_str): + """ + Convert a date and time string to a datetime object using predefined formats. + This function attempts to parse a date and time string (`dt_str`) into a `datetime` object. + It iterates over a list of possible date and time formats (`DATE_FORMATS`). The first successful + parse using one of these formats will result in returning the corresponding `datetime` object. + If none of the formats match, the function returns `None`. + """ + for fmt in DATE_FORMATS: + try: + date_time = datetime.strptime(dt_str, fmt) + return date_time + except ValueError: + continue + return None diff --git a/pyQuARC/schemas/umm-c-json-schema.json b/pyQuARC/schemas/umm-c-json-schema.json index 6bdc49d6..0f33edb2 100644 --- a/pyQuARC/schemas/umm-c-json-schema.json +++ b/pyQuARC/schemas/umm-c-json-schema.json @@ -32,6 +32,18 @@ "description": "This element stores the DOI (Digital Object Identifier) that identifies the collection. Note: The values should start with the directory indicator which in ESDIS' case is 10. If the DOI was registered through ESDIS, the beginning of the string should be 10.5067. The DOI URL is not stored here; it should be stored as a RelatedURL. The DOI organization that is responsible for creating the DOI is described in the Authority element. For ESDIS records the value of https://doi.org/ should be used. For those that want to specify that a DOI is not applicable or unknown use the second option.", "$ref": "umm-cmn-json-schema.json#/definitions/DoiType" }, + "OtherIdentifiers": { + "description": "Provides additional or provider defined identifiers of the collection.", + "type": "array", + "items": { + "$ref": "#/definitions/OtherIdentifierType" + }, + "minItems": 1 + }, + "FileNamingConvention": { + "description": "The File Naming Convention refers to the naming convention of the data set's (Collection's) data files along with a description of the granule file construction.", + "$ref": "#/definitions/FileNamingConventionType" + }, "AssociatedDOIs": { "description": "This element stores DOIs that are associated with the collection such as from campaigns and other related sources. Note: The values should start with the directory indicator which in ESDIS' case is 10. If the DOI was registered through ESDIS, the beginning of the string should be 10.5067. The DOI URL is not stored here; it should be stored as a RelatedURL. The DOI organization that is responsible for creating the DOI is described in the Authority element. For ESDIS records the value of https://doi.org/ should be used.", "type": "array", @@ -106,6 +118,11 @@ "description": "This element describes the production status of the data set. There are five choices for Data Providers: PLANNED refers to data sets to be collected in the future and are thus unavailable at the present time. For Example: The Hydro spacecraft has not been launched, but information on planned data sets may be available. ACTIVE refers to data sets currently in production or data that is continuously being collected or updated. For Example: data from the AIRS instrument on Aqua is being collected continuously. COMPLETE refers to data sets in which no updates or further data collection will be made. For Example: Nimbus-7 SMMR data collection has been completed. DEPRECATED refers to data sets that have been retired, but still can be retrieved. Usually newer products exist that replace the retired data set. NOT APPLICABLE refers to data sets in which a collection progress is not applicable such as a calibration collection. There is a sixth value of NOT PROVIDED that should not be used by a data provider. It is currently being used as a value when a correct translation cannot be done with the current valid values, or when the value is not provided by the data provider.", "$ref": "#/definitions/CollectionProgressEnum" }, + "DataMaturity": { + "description": "The Data Maturity element is used to inform users on where the collection is in a collection's life cycle.", + "type": "string", + "enum": ["Beta", "Provisional", "Validated", "Stage 1 Validation", "Stage 2 Validation", "Stage 3 Validation", "Stage 4 Validation"] + }, "Quality": { "description": "Free text description of the quality of the collection data. Description may include: 1) succinct description of the quality of data in the collection; 2) Any quality assurance procedures followed in producing the data in the collection; 3) indicators of collection quality or quality flags - both validated or invalidated; 4) recognized or potential problems with quality; 5) established quality control mechanisms; and 6) established quantitative quality measurements.", "$ref": "umm-cmn-json-schema.json#/definitions/QualityType" @@ -310,6 +327,7 @@ "description": "This element defines how the data may or may not be used after access is granted to assure the protection of privacy or intellectual property. This includes license text, license URL, or any special restrictions, legal prerequisites, terms and conditions, and/or limitations on using the data set. Data providers may request acknowledgement of the data from users and claim no responsibility for quality and completeness of data.", "oneOf": [{ "type": "object", + "title": "Description without License URL or Text.", "additionalProperties": false, "description": "This element defines how the data may or may not be used after access is granted to assure the protection of privacy or intellectual property. This includes license text, license URL, or any special restrictions, legal prerequisites, terms and conditions, and/or limitations on using the data set. Data providers may request acknowledgement of the data from users and claim no responsibility for quality and completeness of data.", "properties": { @@ -331,12 +349,13 @@ "required": ["Description"] }, { "type": "object", + "title": "License URL", "additionalProperties": false, "description": "This element defines how the data may or may not be used after access is granted to assure the protection of privacy or intellectual property. This includes license text, license URL, or any special restrictions, legal prerequisites, terms and conditions, and/or limitations on using the data set. Data providers may request acknowledgement of the data from users and claim no responsibility for quality and completeness of data.", "properties": { "Description": { "$ref": "#/definitions/UseConstraintsDescType" - }, + }, "LicenseURL": { "description": "This element holds the URL and associated information to access the License on the web. If this element is used the LicenseText element cannot be used.", "$ref": "umm-cmn-json-schema.json#/definitions/OnlineResourceType" @@ -356,6 +375,7 @@ "required": ["LicenseURL"] }, { "type": "object", + "title": "License Text", "additionalProperties": false, "description": "This element defines how the data may or may not be used after access is granted to assure the protection of privacy or intellectual property. This includes license text, license URL, or any special restrictions, legal prerequisites, terms and conditions, and/or limitations on using the data set. Data providers may request acknowledgement of the data from users and claim no responsibility for quality and completeness of data.", "properties": { @@ -494,11 +514,12 @@ "$ref": "#/definitions/GranuleSpatialRepresentationEnum" } }, - "required": ["GranuleSpatialRepresentation"] + "required": ["GranuleSpatialRepresentation"], + "allOf": [{"$ref": "#/definitions/OrbitParameterExistsIfGranuleSpatialRepresentationIsOrbit"}] }, "SpatialCoverageTypeEnum": { "type": "string", - "enum": ["HORIZONTAL", "VERTICAL", "ORBITAL", "HORIZONTAL_VERTICAL", "ORBITAL_VERTICAL", "HORIZONTAL_ORBITAL", "HORIZONTAL_VERTICAL_ORBITAL"] + "enum": ["EARTH/GLOBAL", "HORIZONTAL", "VERTICAL", "ORBITAL", "HORIZONTAL_VERTICAL", "ORBITAL_VERTICAL", "HORIZONTAL_ORBITAL", "HORIZONTAL_VERTICAL_ORBITAL", "LUNAR"] }, "HorizontalSpatialDomainType": { "type": "object", @@ -717,68 +738,165 @@ "required": ["Footprint", "FootprintUnit"] }, "OrbitParametersType": { - "type": "object", - "additionalProperties": false, "description": "Orbit parameters for the collection used by the Orbital Backtrack Algorithm.", - "properties": { - "SwathWidth": { - "description": "Total observable width of the satellite sensor nominally measured at the equator.", - "type": "number" - }, - "SwathWidthUnit": { - "description": "The SwathWidth value's unit.", - "type": "string", - "enum": ["Kilometer", "Meter"] - }, - "Footprints" : { - "description": "A list of instrument footprints or field of views. A footprint holds the largest width of the described footprint as measured on the earths surface along with the width's unit. An optional description element exists to be able to distinguish between the footprints, if that is desired. This element is optional. If this element is used at least 1 footprint must exist in the list.", - "type": "array", - "items": { - "$ref": "#/definitions/FootprintType" + "oneOf":[{ + "type": "object", + "title": "Orbit parameters with just swath", + "additionalProperties": false, + "properties": { + "SwathWidth": { + "description": "Total observable width of the satellite sensor nominally measured at the equator.", + "type": "number" }, - "minItems": 1 - }, - "OrbitPeriod": { - "description": "The time in decimal minutes the satellite takes to make one full orbit.", - "type": "number" - }, - "OrbitPeriodUnit": { - "description": "The Orbit Period value's unit.", - "type": "string", - "enum": ["Decimal Minute"] - }, - "InclinationAngle": { - "description": "The heading of the satellite as it crosses the equator on the ascending pass. This is the same as (180-declination) and also the same as the highest latitude achieved by the satellite.", - "type": "number" - }, - "InclinationAngleUnit": { - "description": "The InclinationAngle value's unit.", - "type": "string", - "enum": ["Degree"] - }, - "NumberOfOrbits": { - "description": "The number of full orbits composing each granule. This may be a fraction of an orbit.", - "type": "number" + "SwathWidthUnit": { + "description": "The SwathWidth value's unit.", + "type": "string", + "enum": ["Kilometer", "Meter"] + }, + "OrbitPeriod": { + "description": "The time in decimal minutes the satellite takes to make one full orbit.", + "type": "number" + }, + "OrbitPeriodUnit": { + "description": "The Orbit Period value's unit.", + "type": "string", + "enum": ["Decimal Minute"] + }, + "InclinationAngle": { + "description": "The heading of the satellite as it crosses the equator on the ascending pass. This is the same as (180-declination) and also the same as the highest latitude achieved by the satellite.", + "type": "number" + }, + "InclinationAngleUnit": { + "description": "The InclinationAngle value's unit.", + "type": "string", + "enum": ["Degree"] + }, + "NumberOfOrbits": { + "description": "The number of full orbits composing each granule. This may be a fraction of an orbit.", + "type": "number" + }, + "StartCircularLatitude": { + "description": "The latitude start of the orbit relative to the equator. This is used by the backtrack search algorithm to treat the orbit as if it starts from the specified latitude. This is optional and will default to 0 if not specified.", + "type": "number" + }, + "StartCircularLatitudeUnit": { + "description": "The StartCircularLatitude value's unit.", + "type": "string", + "enum": ["Degree"] + } }, - "StartCircularLatitude": { - "description": "The latitude start of the orbit relative to the equator. This is used by the backtrack search algorithm to treat the orbit as if it starts from the specified latitude. This is optional and will default to 0 if not specified.", - "type": "number" + "required": ["SwathWidth", "SwathWidthUnit", "OrbitPeriod", "OrbitPeriodUnit", "InclinationAngle", "InclinationAngleUnit", "NumberOfOrbits"], + "dependencies": { + "StartCircularLatitude": ["StartCircularLatitudeUnit"] + } + }, { + "type": "object", + "title": "Orbit parameters with just footprints", + "additionalProperties": false, + "properties": { + "Footprints" : { + "description": "A list of instrument footprints or field of views. A footprint holds the largest width of the described footprint as measured on the earths surface along with the width's unit. An optional description element exists to be able to distinguish between the footprints, if that is desired. This element is optional. If this element is used at least 1 footprint must exist in the list.", + "type": "array", + "items": { + "$ref": "#/definitions/FootprintType" + }, + "minItems": 1 + }, + "OrbitPeriod": { + "description": "The time in decimal minutes the satellite takes to make one full orbit.", + "type": "number" + }, + "OrbitPeriodUnit": { + "description": "The Orbit Period value's unit.", + "type": "string", + "enum": ["Decimal Minute"] + }, + "InclinationAngle": { + "description": "The heading of the satellite as it crosses the equator on the ascending pass. This is the same as (180-declination) and also the same as the highest latitude achieved by the satellite.", + "type": "number" + }, + "InclinationAngleUnit": { + "description": "The InclinationAngle value's unit.", + "type": "string", + "enum": ["Degree"] + }, + "NumberOfOrbits": { + "description": "The number of full orbits composing each granule. This may be a fraction of an orbit.", + "type": "number" + }, + "StartCircularLatitude": { + "description": "The latitude start of the orbit relative to the equator. This is used by the backtrack search algorithm to treat the orbit as if it starts from the specified latitude. This is optional and will default to 0 if not specified.", + "type": "number" + }, + "StartCircularLatitudeUnit": { + "description": "The StartCircularLatitude value's unit.", + "type": "string", + "enum": ["Degree"] + } }, - "StartCircularLatitudeUnit": { - "description": "The StartCircularLatitude value's unit.", - "type": "string", - "enum": ["Degree"] + "required": ["Footprints", "OrbitPeriod", "OrbitPeriodUnit", "InclinationAngle", "InclinationAngleUnit", "NumberOfOrbits"], + "dependencies": { + "StartCircularLatitude": ["StartCircularLatitudeUnit"] } - }, - "anyOf": [{ - "required": ["SwathWidth", "SwathWidthUnit"] }, { - "required": ["Footprints"] - }], - "required": ["OrbitPeriod", "OrbitPeriodUnit", "InclinationAngle", "InclinationAngleUnit", "NumberOfOrbits"], - "dependencies": { - "StartCircularLatitude": ["StartCircularLatitudeUnit"] - } + "type": "object", + "title": "Orbit parameters with both swathwidth and footprints", + "additionalProperties": false, + "properties": { + "SwathWidth": { + "description": "Total observable width of the satellite sensor nominally measured at the equator.", + "type": "number" + }, + "SwathWidthUnit": { + "description": "The SwathWidth value's unit.", + "type": "string", + "enum": ["Kilometer", "Meter"] + }, + "Footprints" : { + "description": "A list of instrument footprints or field of views. A footprint holds the largest width of the described footprint as measured on the earths surface along with the width's unit. An optional description element exists to be able to distinguish between the footprints, if that is desired. This element is optional. If this element is used at least 1 footprint must exist in the list.", + "type": "array", + "items": { + "$ref": "#/definitions/FootprintType" + }, + "minItems": 1 + }, + "OrbitPeriod": { + "description": "The time in decimal minutes the satellite takes to make one full orbit.", + "type": "number" + }, + "OrbitPeriodUnit": { + "description": "The Orbit Period value's unit.", + "type": "string", + "enum": ["Decimal Minute"] + }, + "InclinationAngle": { + "description": "The heading of the satellite as it crosses the equator on the ascending pass. This is the same as (180-declination) and also the same as the highest latitude achieved by the satellite.", + "type": "number" + }, + "InclinationAngleUnit": { + "description": "The InclinationAngle value's unit.", + "type": "string", + "enum": ["Degree"] + }, + "NumberOfOrbits": { + "description": "The number of full orbits composing each granule. This may be a fraction of an orbit.", + "type": "number" + }, + "StartCircularLatitude": { + "description": "The latitude start of the orbit relative to the equator. This is used by the backtrack search algorithm to treat the orbit as if it starts from the specified latitude. This is optional and will default to 0 if not specified.", + "type": "number" + }, + "StartCircularLatitudeUnit": { + "description": "The StartCircularLatitude value's unit.", + "type": "string", + "enum": ["Degree"] + } + }, + "required": ["SwathWidth", "SwathWidthUnit", "Footprints", "OrbitPeriod", "OrbitPeriodUnit", "InclinationAngle", "InclinationAngleUnit", "NumberOfOrbits"], + "dependencies": { + "StartCircularLatitude": ["StartCircularLatitudeUnit"] + } + }] }, "GranuleSpatialRepresentationEnum": { "type": "string", @@ -786,8 +904,8 @@ }, "TilingIdentificationSystemType": { "description": "A two-dimensional tiling system for a collection. There are two types of tiling systems. Those that use alaph-numeric coordinates and those that use numeric coordinates.", - "type": "object", "oneOf": [{ + "type": "object", "title": "Tiling Systems that use alpha-numberic coordinates.", "additionalProperties": false, "description": "Information about a two-dimensional tiling system that uses alpha-numeric coordinates related to this collection.", @@ -807,6 +925,7 @@ }, "required": ["TilingIdentificationSystemName", "Coordinate1", "Coordinate2"] }, { + "type": "object", "title": "Tiling Systems that use numeric coordinates.", "additionalProperties": false, "description": "Information about a two-dimensional tiling system that uses numeric coordinates related to this collection.", @@ -948,8 +1067,9 @@ }, "ResolutionAndCoordinateSystemType": { "description": "This class defines the horizontal spatial extents coordinate system and the data product's horizontal data resolution. The horizontal data resolution is defined as the smallest horizontal distance between successive elements of data in a dataset. This is synonymous with terms such as ground sample distance, sample spacing and pixel size. It is to be noted that the horizontal data resolution could be different in the two horizontal dimensions. Also, it is different from the spatial resolution of an instrument, which is the minimum distance between points that an instrument can see as distinct.", - "type": "object", "oneOf": [{ + "type": "object", + "title": "Description of the Resolution", "additionalProperties": false, "properties": { "Description": { @@ -963,6 +1083,8 @@ }, "required": ["GeodeticModel"] }, { + "type": "object", + "title": "Horizontal Data Resolution Information", "additionalProperties": false, "properties": { "Description": { @@ -980,6 +1102,8 @@ }, "required": ["HorizontalDataResolution"] }, { + "type": "object", + "title": "Local Coordinate System Information", "additionalProperties": false, "properties": { "Description": { @@ -1353,6 +1477,7 @@ "description": "This element defines a single archive artifact which a data provider would like to inform an end user that it exists.", "anyOf": [{ "type": "object", + "title": "Total collection file size for archive", "additionalProperties": false, "properties": { "Format": { @@ -1399,6 +1524,7 @@ } }, { "type": "object", + "title": "Calculate collection file size by start date for archive", "additionalProperties": false, "properties": { "Format": { @@ -1446,6 +1572,7 @@ "description": "This element defines a single artifact that is distributed by the data provider. This element only includes the distributable artifacts that can be obtained by the user without the user having to invoke a service. These should be documented in the UMM-S specification.", "anyOf": [{ "type": "object", + "title": "Total collection file size for distribution", "additionalProperties": false, "properties": { "Format": { @@ -1506,6 +1633,7 @@ } }, { "type": "object", + "title": "Calculate collection file size by start date for distribution", "additionalProperties": false, "properties": { "Format": { @@ -1633,27 +1761,135 @@ "type": "string", "enum": ["us-east-1", "us-east-2", "us-west-1", "us-west-2"] }, - "AssociatedDoiType": { + "FileNamingConventionType": { "type": "object", "additionalProperties": false, - "description": "This element stores the DOI (Digital Object Identifier) that identifies the collection. Note: The values should start with the directory indicator which in ESDIS' case is 10. If the DOI was registered through ESDIS, the beginning of the string should be 10.5067. The DOI URL is not stored here; it should be stored as a RelatedURL. The DOI organization that is responsible for creating the DOI is described in the Authority element. For ESDIS records the value of https://doi.org/ should be used. NASA metadata providers are strongly encouraged to include DOI and DOI Authority for their collections using CollectionDOI property.", + "description": "The File Naming Convention refers to the naming convention of the data set's (Collection's) data files along with a description of the granule file construction.", "properties": { - "DOI": { - "description": "This element stores the DOI (Digital Object Identifier) that identifies the collection. Note: The values should start with the directory indicator which in ESDIS' case is 10. If the DOI was registered through ESDIS, the beginning of the string should be 10.5067. The DOI URL is not stored here; it should be stored as a RelatedURL.", + "Convention": { + "description": "This element represents the convention of the filename.", "type": "string", "minLength": 1, - "maxLength": 1024 - }, - "Title": { - "description": "The title of the DOI landing page. The title describes the DOI object to a user, so they don't have to look it up themselves to understand the association.", - "$ref": "umm-cmn-json-schema.json#/definitions/TitleType" + "maxLength": 4000 }, - "Authority": { - "description": "The DOI organization that is responsible for creating the DOI is described in the Authority element. For ESDIS records the value of https://doi.org/ should be used.", - "$ref": "umm-cmn-json-schema.json#/definitions/AuthorityType" + "Description": { + "description": "This element describes the convention of the filename.", + "type": "string", + "minLength": 1, + "maxLength": 4000 } }, - "required": ["DOI"] + "required": ["Convention"] + }, + "AssociatedDoiType": { + "oneOf": [{ + "type": "object", + "title": "All Documented DOI Types", + "additionalProperties": false, + "description": "This element stores the DOI (Digital Object Identifier) that identifies the collection. Note: The values should start with the directory indicator which in ESDIS' case is 10. If the DOI was registered through ESDIS, the beginning of the string should be 10.5067. The DOI URL is not stored here; it should be stored as a RelatedURL. The DOI organization that is responsible for creating the DOI is described in the Authority element. For ESDIS records the value of https://doi.org/ should be used. NASA metadata providers are strongly encouraged to include DOI and DOI Authority for their collections using CollectionDOI property.", + "properties": { + "DOI": { + "description": "This element stores the DOI (Digital Object Identifier) that identifies the collection. Note: The values should start with the directory indicator which in ESDIS' case is 10. If the DOI was registered through ESDIS, the beginning of the string should be 10.5067. The DOI URL is not stored here; it should be stored as a RelatedURL.", + "type": "string", + "minLength": 1, + "maxLength": 1024 + }, + "Title": { + "description": "The title of the DOI landing page. The title describes the DOI object to a user, so they don't have to look it up themselves to understand the association.", + "$ref": "umm-cmn-json-schema.json#/definitions/TitleType" + }, + "Authority": { + "description": "The DOI organization that is responsible for creating the DOI is described in the Authority element. For ESDIS records the value of https://doi.org/ should be used.", + "$ref": "umm-cmn-json-schema.json#/definitions/AuthorityType" + }, + "Type": { + "description": "This element describes to what DOI is associated.", + "type": "string", + "enum": ["Child Dataset", "Collaborative/Other Agency", "Field Campaign", "Parent Dataset", "Related Dataset"] + } + }, + "required": ["DOI"] + }, { + "type": "object", + "title": "Other DOI Types", + "additionalProperties": false, + "description": "This element stores the DOI (Digital Object Identifier) that identifies the collection. Note: The values should start with the directory indicator which in ESDIS' case is 10. If the DOI was registered through ESDIS, the beginning of the string should be 10.5067. The DOI URL is not stored here; it should be stored as a RelatedURL. The DOI organization that is responsible for creating the DOI is described in the Authority element. For ESDIS records the value of https://doi.org/ should be used. NASA metadata providers are strongly encouraged to include DOI and DOI Authority for their collections using CollectionDOI property.", + "properties": { + "DOI": { + "description": "This element stores the DOI (Digital Object Identifier) that identifies the collection. Note: The values should start with the directory indicator which in ESDIS' case is 10. If the DOI was registered through ESDIS, the beginning of the string should be 10.5067. The DOI URL is not stored here; it should be stored as a RelatedURL.", + "type": "string", + "minLength": 1, + "maxLength": 1024 + }, + "Title": { + "description": "The title of the DOI landing page. The title describes the DOI object to a user, so they don't have to look it up themselves to understand the association.", + "$ref": "umm-cmn-json-schema.json#/definitions/TitleType" + }, + "Authority": { + "description": "The DOI organization that is responsible for creating the DOI is described in the Authority element. For ESDIS records the value of https://doi.org/ should be used.", + "$ref": "umm-cmn-json-schema.json#/definitions/AuthorityType" + }, + "Type": { + "description": "This element describes to what DOI is associated.", + "type": "string", + "enum": ["Other"] + }, + "DescriptionOfOtherType": { + "description": "This element allows the curator to describe what kind of DOI is present when the value of Other is chosen as the type. This element is not allowed if a value other than Other is chosen.", + "type": "string", + "minLength": 1, + "maxLength": 1024 + } + }, + "required": ["DOI", "Type", "DescriptionOfOtherType"] + }] + }, + "OtherIdentifierType": { + "oneOf": [{ + "type": "object", + "title": "ArchiveSetsNumber", + "additionalProperties": false, + "description": "This object stores an additional identifier of the collection.", + "properties": { + "Identifier": { + "description": "This element stores the identifier", + "type": "string", + "minLength": 1, + "maxLength": 1024 + }, + "Type": { + "description": "This element represents the type of the identifier.", + "type": "string", + "enum": ["ArchiveSetsNumber"] + } + }, + "required": ["Identifier", "Type"] + }, { + "type": "object", + "title": "Other", + "additionalProperties": false, + "description": "This object stores an additional identifier of the collection.", + "properties": { + "Identifier": { + "description": "This element stores the identifier", + "type": "string", + "minLength": 1, + "maxLength": 1024 + }, + "Type": { + "description": "This element represents the type of the identifier.", + "type": "string", + "enum": ["Other"] + }, + "DescriptionOfOtherType": { + "description": "This element allows the curator to describe what kind of Identifier is present when the value of Other is chosen as the type. This element is not allowed if a value other than Other is chosen.", + "type": "string", + "minLength": 1, + "maxLength": 1024 + } + }, + "required": ["Identifier", "Type", "DescriptionOfOtherType"] + }] }, "MetadataSpecificationType": { "type": "object", @@ -1663,7 +1899,7 @@ "URL": { "description": "This element represents the URL where the schema lives. The schema can be downloaded.", "type": "string", - "enum": ["https://cdn.earthdata.nasa.gov/umm/collection/v1.17.3"] + "enum": ["https://cdn.earthdata.nasa.gov/umm/collection/v1.18.0"] }, "Name": { "description": "This element represents the name of the schema.", @@ -1673,10 +1909,15 @@ "Version": { "description": "This element represents the version of the schema.", "type": "string", - "enum": ["1.17.3"] + "enum": ["1.18.0"] } }, "required": ["URL", "Name", "Version"] + }, + "OrbitParameterExistsIfGranuleSpatialRepresentationIsOrbit": { + "$comment": "Checks if the Granule Spatial Representation value is Oribt, then the oribt parameter must exist.", + "if": {"properties": {"GranuleSpatialRepresentation": {"const": "ORBIT"}}}, + "then": {"required": ["OrbitParameters"]} } } } diff --git a/pyQuARC/version.txt b/pyQuARC/version.txt index 3c43790f..c04c650a 100644 --- a/pyQuARC/version.txt +++ b/pyQuARC/version.txt @@ -1 +1 @@ -1.2.6 +1.2.7 diff --git a/requirements.txt b/requirements.txt index 6d1440ed..30aec17c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,5 +8,5 @@ requests==2.24.0 setuptools==60.8.2 strict-rfc3339==0.7 tqdm==4.48.2 -urlextract==1.0.0 +urlextract==1.3.0 xmltodict==0.12.0