Source code for openghg.store._metadata

import json
from typing import Dict, Optional, Sequence
from openghg.objectstore import exists, get_bucket, get_object, set_object_from_json

# from openghg.dataobjects import DataHandler
# DHType = TypeVar('U', bound=DataHandler)

from tinydb import Storage, TinyDB
from tinydb.middlewares import CachingMiddleware


def load_metastore(key: str) -> TinyDB:
    """Load the metastore. This can be used as a context manager
    otherwise the database must be closed using the close method
    otherwise records are not written to file.

    Args:
        key: Key to metadata store
    Returns:
        TinyDB: instance of metadata database
    """
    return TinyDB(key, storage=CachingMiddleware(ObjectStorage))


class ObjectStorage(Storage):
    def __init__(self, key: str) -> None:
        self._key = key

    def read(self) -> Optional[Dict]:
        bucket = get_bucket()
        key = self._key

        if not exists(bucket=bucket, key=key):
            return None

        data = get_object(bucket=bucket, key=self._key)

        try:
            json_data: Dict = json.loads(data)
            return json_data
        except json.JSONDecodeError:
            return None

    def write(self, data: Dict) -> None:
        bucket = get_bucket()
        key = self._key

        set_object_from_json(bucket=bucket, key=key, data=data)

    def close(self) -> None:
        pass


def datasource_lookup(
    metastore: TinyDB, data: Dict, required_keys: Sequence[str], min_keys: Optional[int] = None
) -> Dict:
    """Search the metadata store for a Datasource UUID using the metadata in data. We expect the required_keys
    to be present and will require at leas min_keys of these to be present when searching.

    As some metadata value might change (such as data owners etc) we don't want to do an exact
    search on *all* the metadata so we extract a subset (the required keys) and search for these.

    Args:
        metastore: Metadata database
        data: Combined data dictionary of form {key: {data: Dataset, metadata: Dict}}
        required_keys: Iterable of keys to extract from metadata
        min_keys: The minimum number of required keys, if not given it will be set
        to the length of required_keys
    Return:
        dict: Dictionary of datasource information
    """
    from openghg.retrieve import metadata_lookup

    if min_keys is None:
        min_keys = len(required_keys)

    results = {}
    for key, _data in data.items():
        metadata = _data["metadata"]
        required_metadata = {k.lower(): str(v).lower() for k, v in metadata.items() if k in required_keys}

        if len(required_metadata) < min_keys:
            raise ValueError(
                f"The given metadata doesn't contain enough information, we need: {required_keys}"
            )

        results[key] = metadata_lookup(metadata=required_metadata, database=metastore)

    return results


[docs]def data_handler_lookup(data_type: str, **kwargs: Dict): # type: ignore """Lookup the data / metadata you'd like to modify. Args: data_type: Type of data, for example surface, flux, footprint kwargs: Any pair of keyword arguments for searching Returns: DataHandler: A handler object to help modify the metadata """ from openghg.retrieve import search from openghg.dataobjects import DataHandler res = search(data_type=data_type, **kwargs) metadata = res.metadata return DataHandler(metadata=metadata)