Source code for openghg.util._inlet

from typing import cast, overload
import logging
from openghg.types import optionalPathType

__all__ = ["format_inlet", "extract_height_name"]

logger = logging.getLogger("openghg.util")
logger.setLevel(logging.INFO)  # Have to set level for logger as well as handler


@overload
def format_inlet(
    inlet: str,
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> str: ...


@overload
def format_inlet(
    inlet: None,
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> None: ...


@overload
def format_inlet(
    inlet: slice,
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> slice: ...


@overload
def format_inlet(
    inlet: list[str | slice | None],
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> list[str | slice | None]: ...


[docs] def format_inlet( inlet: str | slice | None | list[str | slice | None], units: str = "m", key_name: str | None = None, special_keywords: list | None = None, ) -> str | slice | None | list[str | slice | None]: """ Make sure inlet / height name conforms to standard. The standard imposed can depend on the associated key_name itself (can be supplied as an option to check). This standard is as follows: - number followed by unit - number alone if unit / derviative is specified at the end of key_name (e.g. station_height_masl) - unchanged if this is one of the special keywords (by default "multiple" or "various") Other considerations: - For units of "m", we will also look for "magl" and "masl" (metres above ground and sea level) - If the input string just contains numbers, it is assumed this is already within the correct unit. Args: inlet: Inlet / Height value in the specified units units: Units for the inlet value ("m" by default) key_name: Name of the associated key. This is optional but will be used to determine whether the unit value should be added to the output string. special_keywords: Specify special keywords inlet could be set to If so do not apply any formatting. If this is not set a special keyword of "multiple" and "column" will still be allowed. Returns: same type as input, with all strings formatted Usage: >>> format_inlet("10") "10m" >>> format_inlet("10m") "10m" >>> format_inlet("10magl") "10m" >>> format_inlet("10.111") "10.1m" >>> format_inlet(["10", 100]) ["10m", "100m"] >>> format_inlet("multiple") "multiple" >>> format_inlet("10m", key_name="inlet") "10m" >>> format_inlet("10m", key_name="inlet_magl") "10" >>> format_inlet("10m", key_name="station_height_masl") "10" """ # process list recursively if isinstance(inlet, list): return [format_inlet(x) for x in inlet] # pass through None and slice if inlet is None or isinstance(inlet, slice): return inlet # By default the special keyword is "multiple" for data containing multiple inlets. # This will be included if data is a combined object from the object store. if special_keywords is None: special_keywords = ["multiple", "column"] # Check if inlet is set to a special keyword if inlet in special_keywords: return inlet # Define set of options associated with units. For "m" this include # "magl" and "masl" (metres above ground and sea level). if units == "m": unit_options = ["m", "magl", "masl"] else: unit_options = [cast(str, units)] # Check whether unit is needed in string output. # This is dependent on whether the key name itself contains the unit value # (or derivative). If so, the unit itself is not needed in the value. unit_needed = True if key_name is not None: for value in unit_options: if key_name.split("_")[-1] == value: unit_needed = False # Check if input inlet just contains numbers and no unit # If so assume the units are metres and add this to the end of the string try: inlet_float = float(inlet) except ValueError: pass else: if inlet_float.is_integer(): if unit_needed: inlet = f"{inlet_float:.0f}{units}" else: inlet = f"{inlet_float:.0f}" else: if unit_needed: inlet = f"{inlet_float:.1f}{units}" else: inlet = f"{inlet_float:.1f}" return str(inlet) # If we were unable to cast inlet as a float # check if inlet ends with unit or unit derivative # e.g. "magl" and "masl" would need to replaced with "m" or be removed for value in unit_options: if inlet.endswith(value): if unit_needed: inlet = inlet.replace(value, units) else: inlet = inlet.rstrip(value) break # else: # raise ValueError(f"Did not recognise input for inlet: {inlet}") return str(inlet)
def extract_height_name( site: str, network: str | None = None, inlet: str | None = None, site_filepath: optionalPathType = None, ) -> str | list | None: """ Extract the relevant height associated with NAME from the "height_name" variable, if present from site_info data. This expects the "height_name" variable to be one of: - list containing the same number of items as inlets for the site - dictionary containing the mapping between inlets and heights used in NAME. Args: site : Site code network: Name of the associated network for the site inlet: Observation inlet / height value in the specified units site_filepath: Alternative site info file. Defaults to openghg_defs input. Returns: str : appropriate height name value extracted from site_info list: multiple height name options extracted from site_info None: if value not found or ambiguous. """ from openghg.util import get_site_info site_data = get_site_info(site_filepath=site_filepath) if site: site_upper = site.upper() if network is None: network = next(iter(site_data[site_upper])) else: network = network.upper() height_name_attr = "height_name" height_attr = "height" if site_upper in site_data: site_metadata = site_data[site_upper][network] if height_name_attr in site_metadata: # Extract height_name variable from the site_metadata height_name_extracted = site_metadata[height_name_attr] # If this is a list, check and try and extract appropriate value. if isinstance(height_name_extracted, list): # Check if multiple values for height_name_extracted are present (list > 1) if len(height_name_extracted) == 1: height_name: str | None = height_name_extracted[0] else: # If this is ambiguous, check "height" attr to match against site inlet value # This assumes two lists of the same length map to each other with translating values if (inlet is not None) and (height_attr in site_metadata): height_values = site_metadata[height_attr] if len(height_values) == len(height_name_extracted) and (inlet in height_values): index = height_values.index(inlet) height_name = height_name_extracted[index] else: logger.warning( f"Ambiguous '{height_name_attr}' in site_info. " f"Unable to extract from: height_name = {height_name_extracted} using height = {inlet}" ) height_name = None else: logger.warning( f"Ambiguous '{height_name_attr}' in site_info. " f"Unable to extract from: height_name = {height_name_extracted}" ) height_name = None elif isinstance(height_name_extracted, dict): if (inlet is not None) and (inlet in height_name_extracted): height_name = height_name_extracted[inlet] else: logger.warning( f"Unable to interpret {height_name_extracted}. Please supply or check supplied inlet value: {inlet}" ) height_name = None else: height_name = None else: height_name = None return height_name