Source code for openghg.standardise.surface._cranfield
from pathlib import Path
from typing import Dict, List, Optional, Union
import warnings
[docs]
def parse_cranfield(
filepath: Union[str, Path],
site: Optional[str] = None,
network: Optional[str] = None,
inlet: Optional[str] = None,
instrument: Optional[str] = None,
sampling_period: Optional[str] = None,
measurement_type: Optional[str] = None,
**kwargs: Dict,
) -> Dict:
"""Creates a CRDS object holding data stored within Datasources
Args:
filepath: Path of file to load
filepath : Filepath of data to be read
site: Name of site
network: Name of network
Returns:
dict: Dictionary of gas data
"""
from openghg.util import clean_string, format_inlet
from pandas import read_csv
warnings.warn("This function will be removed in a future release", DeprecationWarning)
if sampling_period is None:
sampling_period = "NOT_SET"
filepath = Path(filepath)
data = read_csv(filepath, parse_dates=["Date"], index_col="Date")
data = data.rename(
columns={
"Methane/ppm": "ch4",
"Methane stdev/ppm": "ch4 variability",
"CO2/ppm": "co2",
"CO2 stdev/ppm": "co2 variability",
"CO/ppm": "co",
"CO stdev/ppm": "co variability",
}
)
data.index.name = "time"
# Convert CH4 and CO to ppb
data["ch4"] = data["ch4"] * 1e3
data["ch4 variability"] = data["ch4 variability"] * 1e3
data["co"] = data["co"] * 1e3
data["co variability"] = data["co variability"] * 1e3
inlet = "10m"
metadata = {}
metadata["site"] = "THB"
metadata["instrument"] = "CRDS"
metadata["sampling_period"] = str(sampling_period)
metadata["height"] = format_inlet(inlet, key_name="height")
metadata["inlet"] = format_inlet(inlet, key_name="inlet")
metadata["inlet_height_magl"] = format_inlet(inlet, key_name="inlet_height_magl")
metadata["network"] = "CRANFIELD"
metadata["data_type"] = "surface"
# TODO - this feels fragile
species: List[str] = [col for col in data.columns if " " not in col]
combined_data = {}
# Number of columns of data for each species
n_cols = 2
for n, sp in enumerate(species):
# for sp in species:
# Create a copy of the metadata dict
species_metadata = metadata.copy()
species_metadata["species"] = str(clean_string(sp))
# Here we don't want to match the co in co2
# For now we'll just have 2 columns for each species
# cols = [col for col in data.columns if sp in col]
gas_data = data.iloc[:, n * n_cols : (n + 1) * n_cols]
# Convert from a pandas DataFrame to an xarray Dataset
gas_data = gas_data.to_xarray()
combined_data[sp] = {"metadata": species_metadata, "data": gas_data}
return combined_data