Source code for openghg.standardise.meta._metadata

import logging
import math
from copy import deepcopy
from typing import Dict, List, Optional
from openghg.types import AttrMismatchError
from openghg.util import is_number

logger = logging.getLogger("openghg.standardise.metadata")
logger.setLevel(logging.DEBUG)  # Have to set level for logger as well as handler


[docs]def metadata_default_keys() -> List: """ Define default values expected within ObsSurface metadata """ default_keys = [ "site", "species", "inlet", "inlet_height_magl", "network", "instrument", "sampling_period", "calibration_scale", "data_owner", "data_owner_email", "station_longitude", "station_latitude", "station_long_name", "station_height_masl", ] return default_keys
[docs]def sync_surface_metadata( metadata: Dict, attributes: Dict, keys_to_add: Optional[List] = None, update_mismatch: bool = False, ) -> Dict: """Makes sure any duplicated keys between the metadata and attributes dictionaries match and that certain keys are present in the metadata. Args: metadata: Dictionary of metadata attributes: Attributes keys_to_add: Add these keys to the metadata, if not present, based on the attribute values. Note: this skips any keys which can't be copied from the attribute values. update_mismatch: If case insensitive mismatch is found between an attribute and a metadata value, update the metadata to contain the attribute value. By default this will raise an AttrMismatchError. Returns: dict: Copy of metadata updated with attributes """ meta_copy = deepcopy(metadata) # Check if we have differences for key, value in metadata.items(): try: attr_value = attributes[key] # This should mainly be used for lat/long relative_tolerance = 1e-3 if is_number(attr_value) and is_number(value): if not math.isclose(float(attr_value), float(value), rel_tol=relative_tolerance): err_warn_str = ( f"Value of {key} not within tolerance, metadata: {value} - attributes: {attr_value}" ) if not update_mismatch: raise AttrMismatchError(err_warn_str) else: logger.warning( f"{err_warn_str}\nUpdating metadata to use attribute value of {key} = {attr_value}" ) meta_copy[key] = str(attr_value) else: # Here we don't care about case. Within the Datasource we'll store the # metadata as all lowercase, within the attributes we'll keep the case. if str(value).lower() != str(attr_value).lower(): if not update_mismatch: raise AttrMismatchError( f"Metadata mismatch for '{key}', metadata: {value} - attributes: {attr_value}" ) else: logger.warning( f"Metadata mismatch for '{key}', metadata: {value} - attributes: {attr_value}\n" f"Updating metadata to use attribute value of {key} = {attr_value}" ) meta_copy[key] = attr_value except KeyError: # Key wasn't in attributes for comparison pass default_keys_to_add = metadata_default_keys() if keys_to_add is None: keys_to_add = default_keys_to_add # Check set of keys which should be in metadata and add if not present for key in keys_to_add: if key not in meta_copy.keys(): try: meta_copy[key] = attributes[key] except KeyError: logger.warning(f"{key} key not in attributes or metadata") return meta_copy