Source code for openghg.cloud._packaging

from collections import defaultdict
from typing import DefaultDict, Dict, Optional

from openghg.util import (
    compress,
    compress_json,
    compress_str,
    decompress,
    decompress_json,
    hash_bytes,
)


[docs] def unpackage(data: Dict) -> Dict: """Unpackages and checks a dictionary created by the package_from_function function. This checks the SHA1 sums and decompresses the data. Args: data: Dictionary Returns: dict: Dictionary containing data and metadata if given """ unpacked = {} file_metadata = data["file_metadata"] remote_sum = file_metadata["data"]["sha1_hash"] decompressed_data = decompress(data=data["data"]) local_sum = hash_bytes(data=decompressed_data) unpacked["data"] = decompressed_data if not remote_sum == local_sum: raise ValueError(f"Hash mismatch, remote {remote_sum} - local {local_sum}.") try: compressed_metadata = data["metadata"] except KeyError: pass else: unpacked["metadata"] = decompress_json(data=compressed_metadata) return unpacked
[docs] def package_from_function(data: bytes, metadata: Optional[str] = None) -> Dict: """Creates a package of data ready to be sent back to the caller. This calculates the SHA1 sum of the passed data and compresses it. If metadata is passed this is added to the returned dictionary. No SHA1 is calculated for the metadata. NOTE: This function should only be used internally by a serverless function. Args: data: Binary data metadata: Result of json.dumps Returns: dict: Dictionary of compressed data and file metadata. """ sha1_hash = hash_bytes(data=data) compressed_data = compress(data=data) compression_type = "bz2" packaged: DefaultDict = defaultdict(dict) packaged["found"] = True packaged["data"] = compressed_data packaged["file_metadata"]["data"] = {"sha1_hash": sha1_hash, "compression_type": compression_type} if metadata is not None: try: compressed_metadata = compress_str(s=metadata) except AttributeError: try: compressed_metadata = compress_json(data=metadata) except Exception as e: raise TypeError(f"Unable to process this object: {e}") packaged["metadata"] = compressed_metadata packaged["file_metadata"]["metadata"] = {"sha1_hash": False, "compression_type": compression_type} return dict(packaged)