Source code for openghg.util._inlet
from typing import cast, overload
import logging
from openghg.types import pathType
__all__ = ["format_inlet", "extract_height_name"]
logger = logging.getLogger("openghg.util")
logger.setLevel(logging.INFO)  # Have to set level for logger as well as handler
@overload
def format_inlet(
    inlet: str,
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> str: ...
@overload
def format_inlet(
    inlet: None,
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> None: ...
@overload
def format_inlet(
    inlet: slice,
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> slice: ...
@overload
def format_inlet(
    inlet: list[str | slice | None],
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> list[str | slice | None]: ...
[docs]
def format_inlet(
    inlet: str | slice | None | list[str | slice | None],
    units: str = "m",
    key_name: str | None = None,
    special_keywords: list | None = None,
) -> str | slice | None | list[str | slice | None]:
    """
    Make sure inlet / height name conforms to standard. The standard
    imposed can depend on the associated key_name itself (can
    be supplied as an option to check).
    This standard is as follows:
     - number followed by unit
     - number alone if unit / derviative is specified at the end of key_name (e.g. station_height_masl)
     - unchanged if this is one of the special keywords (by default "multiple" or "various")
    Other considerations:
     - For units of "m", we will also look for "magl" and "masl" (metres above ground and sea level)
     - If the input string just contains numbers, it is assumed this is already within the correct unit.
    Args:
        inlet: Inlet / Height value in the specified units
        units: Units for the inlet value ("m" by default)
        key_name: Name of the associated key. This is optional but will be used to
            determine whether the unit value should be added to the output string.
        special_keywords: Specify special keywords inlet could be set to
            If so do not apply any formatting.
            If this is not set a special keyword of "multiple" and "column" will still be allowed.
    Returns:
        same type as input, with all strings formatted
    Usage:
        >>> format_inlet("10")
            "10m"
        >>> format_inlet("10m")
            "10m"
        >>> format_inlet("10magl")
            "10m"
        >>> format_inlet("10.111")
            "10.1m"
        >>> format_inlet(["10", 100])
            ["10m", "100m"]
        >>> format_inlet("multiple")
            "multiple"
        >>> format_inlet("10m", key_name="inlet")
            "10m"
        >>> format_inlet("10m", key_name="inlet_magl")
            "10"
        >>> format_inlet("10m", key_name="station_height_masl")
            "10"
    """
    # process list recursively
    if isinstance(inlet, list):
        return [format_inlet(x) for x in inlet]
    # pass through None and slice
    if inlet is None or isinstance(inlet, slice):
        return inlet
    # By default the special keyword is "multiple" for data containing multiple inlets.
    # This will be included if data is a combined object from the object store.
    if special_keywords is None:
        special_keywords = ["multiple", "column"]
    # Check if inlet is set to a special keyword
    if inlet in special_keywords:
        return inlet
    # Define set of options associated with units. For "m" this include
    # "magl" and "masl" (metres above ground and sea level).
    if units == "m":
        unit_options = ["m", "magl", "masl"]
    else:
        unit_options = [cast(str, units)]
    # Check whether unit is needed in string output.
    # This is dependent on whether the key name itself contains the unit value
    # (or derivative). If so, the unit itself is not needed in the value.
    unit_needed = True
    if key_name is not None:
        for value in unit_options:
            if key_name.split("_")[-1] == value:
                unit_needed = False
    # Check if input inlet just contains numbers and no unit
    # If so assume the units are metres and add this to the end of the string
    try:
        inlet_float = float(inlet)
    except ValueError:
        pass
    else:
        if inlet_float.is_integer():
            if unit_needed:
                inlet = f"{inlet_float:.0f}{units}"
            else:
                inlet = f"{inlet_float:.0f}"
        else:
            if unit_needed:
                inlet = f"{inlet_float:.1f}{units}"
            else:
                inlet = f"{inlet_float:.1f}"
        return str(inlet)
    # If we were unable to cast inlet as a float
    # check if inlet ends with unit or unit derivative
    # e.g. "magl" and "masl" would need to replaced with "m" or be removed
    for value in unit_options:
        if inlet.endswith(value):
            if unit_needed:
                inlet = inlet.replace(value, units)
            else:
                inlet = inlet.rstrip(value)
            break
    # else:
    #     raise ValueError(f"Did not recognise input for inlet: {inlet}")
    return str(inlet)
def extract_height_name(
    site: str,
    network: str | None = None,
    inlet: str | None = None,
    site_filepath: pathType | None = None,
) -> str | list | None:
    """
    Extract the relevant height associated with NAME from the
    "height_name" variable, if present from site_info data.
    This expects the "height_name" variable to be one of:
      - list containing the same number of items as inlets for the site
      - dictionary containing the mapping between inlets and heights
        used in NAME.
    Args:
        site : Site code
        network: Name of the associated network for the site
        inlet: Observation inlet / height value in the specified units
        site_filepath: Alternative site info file. Defaults to openghg_defs input.
    Returns:
        str : appropriate height name value extracted from site_info
        list: multiple height name options extracted from site_info
        None: if value not found or ambiguous.
    """
    from openghg.util import get_site_info
    site_data = get_site_info(site_filepath=site_filepath)
    site_upper = site.upper()
    if network is None:
        network = next(iter(site_data[site_upper]))
    else:
        network = network.upper()
    height_name_attr = "height_name"
    height_attr = "height"
    if site_upper in site_data:
        site_metadata = site_data[site_upper][network]
        if height_name_attr in site_metadata:
            # Extract height_name variable from the site_metadata
            height_name_extracted = site_metadata[height_name_attr]
            # If this is a list, check and try and extract appropriate value.
            if isinstance(height_name_extracted, list):
                # Check if multiple values for height_name_extracted are present (list > 1)
                if len(height_name_extracted) == 1:
                    height_name: str | None = height_name_extracted[0]
                else:
                    # If this is ambiguous, check "height" attr to match against site inlet value
                    # This assumes two lists of the same length map to each other with translating values
                    if (inlet is not None) and (height_attr in site_metadata):
                        height_values = site_metadata[height_attr]
                        if len(height_values) == len(height_name_extracted) and (inlet in height_values):
                            index = height_values.index(inlet)
                            height_name = height_name_extracted[index]
                        else:
                            logger.warning(
                                f"Ambiguous '{height_name_attr}' in site_info. "
                                f"Unable to extract from: height_name = {height_name_extracted} using height = {inlet}"
                            )
                            height_name = None
                    else:
                        logger.warning(
                            f"Ambiguous '{height_name_attr}' in site_info. "
                            f"Unable to extract from: height_name = {height_name_extracted}"
                        )
                        height_name = None
            elif isinstance(height_name_extracted, dict):
                if (inlet is not None) and (inlet in height_name_extracted):
                    height_name = height_name_extracted[inlet]
                else:
                    logger.warning(
                        f"Unable to interpret {height_name_extracted}. Please supply or check supplied inlet value: {inlet}"
                    )
                    height_name = None
        else:
            height_name = None
    else:
        height_name = None
    return height_name
