Source code for openghg.store._segment
""" Segment the data into Datasources
"""
from typing import Dict
__all__ = ["assign_data"]
[docs]def assign_data(
data_dict: Dict,
lookup_results: Dict,
overwrite: bool,
data_type: str,
) -> Dict[str, Dict]:
"""Assign data to a Datasource. This will either create a new Datasource
Create or get an existing Datasource for each gas in the file
Args:
data_dict: Dictionary containing data and metadata for species
lookup_results: Dictionary of lookup results]
overwrite: If True overwrite current data stored
Returns:
dict: Dictionary of UUIDs of Datasources data has been assigned to keyed by species name
"""
from openghg.store.base import Datasource
uuids = {}
for key in data_dict:
metadata = data_dict[key]["metadata"]
data = data_dict[key]["data"]
# Our lookup results and gas data have the same keys
uuid = lookup_results[key]
# Add the read metadata to the Dataset attributes being careful
# not to overwrite any attributes that are already there
to_add = {k: v for k, v in metadata.items() if k not in data.attrs}
data.attrs.update(to_add)
# If we have a UUID for this Datasource load the existing object
# from the object store
if uuid is False:
datasource = Datasource()
else:
datasource = Datasource.load(uuid=uuid)
# Add the dataframe to the datasource
datasource.add_data(metadata=metadata, data=data, overwrite=overwrite, data_type=data_type)
# Save Datasource to object store
datasource.save()
new_datasource = uuid is False
uuids[key] = {"uuid": datasource.uuid(), "new": new_datasource}
return uuids
# def assign_footprint_data(footprint_data: Dict, lookup_results: Dict, overwrite: bool) -> Dict:
# """ Create Datasources for the passed footprints data
# Args:
# data: xarray Dataset of footprints data
# metadata: Associated metadata
# datasource_uid: The UUID of the datasource if we've processed footprints data from this
# source before, otherwise False
# Returns:
# dict: Dictionary containing Datasource UUIDs
# """
# from openghg.modules import Datasource
# uuids = {}
# # Add in copying of attributes, or add attributes to the metadata at an earlier state.
# for key in footprint_data:
# metadata = footprint_data[key]["metadata"]
# data = footprint_data[key]["data"]
# # Our lookup results and gas data have the same keys
# uuid = lookup_results[key]
# # TODO - Could this be done somewhere else? It doesn't feel quite right it
# # being here
# # Add the read metadata to the Dataset attributes being careful
# # not to overwrite any attributes that are already there
# to_add = {k: v for k, v in metadata.items() if k not in data.attrs}
# data.attrs.update(to_add)
# # If we have a UUID for this Datasource load the existing object
# # from the object store
# if uuid:
# datasource = Datasource.load(uuid=uuid)
# else:
# datasource = Datasource()
# # TODO - can we just ad
# # Add the dataframe to the datasource
# datasource.add_footprint_data(data=data, metadata=metadata, overwrite=overwrite)
# # Save Datasource to object store
# datasource.save()
# uuids[key] = datasource.uuid()
# return uuids
# def assign_emissions_data(data: Dataset, metadata: Dict, datasource_uid: Union[str, bool]) -> str:
# """ Create Datasources for the passed flux data
# Args:
# data: xarray Dataset of footprints data
# metadata: Associated metadata
# datasource_uid: The UUID of the datasource if we've processed flux data from this
# source before, otherwise False
# Returns:
# str: UUID of Datasource
# """
# from openghg.modules import Datasource
# if datasource_uid is not False:
# datasource = Datasource.load(uuid=datasource_uid)
# else:
# datasource = Datasource()
# # Add the read metadata to the Dataset attributes being careful
# # not to overwrite any attributes that are already there
# to_add = {k: v for k, v in metadata.items() if k not in data.attrs}
# data.attrs.update(to_add)
# datasource.add_emissions_data(data=data, metadata=metadata)
# datasource.save()
# return datasource.uuid()