Source code for openghg.standardise.surface._btt

from pathlib import Path
from typing import Dict, Optional, Union
import warnings

from openghg.standardise.meta import dataset_formatter



[docs]
def parse_btt(
    filepath: Union[str, Path],
    site: Optional[str] = "BTT",
    network: Optional[str] = "LGHG",
    inlet: Optional[str] = None,
    instrument: Optional[str] = None,
    update_mismatch: str = "never",
) -> Dict:
    """Reads NPL data files and returns the UUIDS of the Datasources
    the processed data has been assigned to

    Args:
        filepath: Path of file to load
        site: Site name
        update_mismatch: This determines how mismatches between the internal data
            "attributes" and the supplied / derived "metadata" are handled.
            This includes the options:
              - "never" - don't update mismatches and raise an AttrMismatchError
              - "from_source" / "attributes" - update mismatches based on input data (e.g. data attributes)
              - "from_definition" / "metadata" - update mismatches based on associated data (e.g. site_info.json)
    Returns:
        dict: Dictionary of gas data
    """
    from numpy import nan as np_nan
    from openghg.standardise.meta import assign_attributes
    from openghg.util import clean_string, load_internal_json, get_site_info, format_inlet
    from pandas import Timestamp, isnull, read_csv, to_timedelta

    # TODO: Decide what to do about inputs which aren't use anywhere
    # at present - inlet, instrument, sampling_period, measurement_type

    filepath = Path(filepath)

    warnings.warn("This function will be removed in a future release", DeprecationWarning)
    site = "BTT"

    # Rename these columns
    rename_dict = {"co2.cal": "CO2", "ch4.cal.ppb": "CH4"}
    # We only want these species
    species_extract = ["CO2", "CH4"]
    # Take std-dev measurements from these columns for these species
    species_sd = {"CO2": "co2.sd.ppm", "CH4": "ch4.sd.ppb"}

    site_data = get_site_info()
    site_info = site_data[site][network]

    param_data = load_internal_json(filename="attributes.json")
    network_params = param_data["BTT"]
    site_attributes = network_params["global_attributes"]

    sampling_period = int(network_params["sampling_period"])
    sampling_period_seconds = str(sampling_period) + "s"

    metadata = {}
    metadata["site"] = site
    metadata["inlet"] = format_inlet(network_params["inlet"], key_name="inlet")
    metadata["instrument"] = network_params["instrument"]
    metadata["sampling_period"] = str(sampling_period)
    metadata["station_longitude"] = site_info["longitude"]
    metadata["station_latitude"] = site_info["latitude"]
    metadata["station_long_name"] = site_info["long_name"]
    metadata["data_type"] = "surface"
    metadata["source_format"] = "btt"

    attributes = network_params["global_attributes"]
    attributes["inlet_height_magl"] = format_inlet(network_params["inlet"], key_name="inlet_height_magl")
    attributes.update(metadata)

    data = read_csv(filepath)
    data["time"] = Timestamp("2019-01-01 00:00") + to_timedelta(data["DOY"] - 1, unit="D")
    data["time"] = data["time"].dt.round(sampling_period_seconds)
    data = data[~isnull(data.time)]

    data = data.rename(columns=rename_dict)
    data = data.set_index("time")

    gas_data = {}
    for species in species_extract:
        processed_data = data.loc[:, [species]].sort_index()
        # Create a variability column
        species_stddev_label = species_sd[species]
        processed_data[species][f"{species} variability"] = data[species_stddev_label]

        # Replace any values below zero with NaNs
        processed_data[processed_data < 0] = np_nan
        # Drop NaNs
        processed_data = processed_data.dropna()
        # Convert to a Dataset
        processed_data = processed_data.to_xarray()

        species_attributes = attributes.copy()
        species_attributes["species"] = clean_string(species)

        species_metadata = metadata.copy()
        species_metadata["species"] = clean_string(species)

        gas_data[species] = {
            "metadata": species_metadata,
            "data": processed_data,
            "attributes": site_attributes,
        }
    gas_data = dataset_formatter(data=gas_data)

    gas_data = assign_attributes(data=gas_data, site=site, network=network, update_mismatch=update_mismatch)

    return gas_data