Source code for openghg.util._species
import logging
from typing import Optional, Any
from openghg.util import load_json
from openghg.types import optionalPathType
__all__ = [
"get_species_info",
"synonyms",
"species_lifetime",
"check_lifetime_monthly",
"check_species_lifetime",
"check_species_time_resolved",
"molar_mass",
]
logger = logging.getLogger("openghg.util.species")
def get_species_info(species_filepath: optionalPathType = None) -> dict[str, Any]:
"""Extract data from species info JSON file as a dictionary.
This uses the data stored within openghg_defs/species_info JSON file by default.
Args:
species_filepath: Alternative species info file.
Returns:
dict: Data from species JSON file
"""
from openghg_defs import species_info_file
fpath = species_info_file if species_filepath is None else species_filepath
return load_json(path=fpath)
[docs]
def synonyms(
species: str,
lower: bool = True,
allow_new_species: bool = True,
species_filepath: optionalPathType = None,
) -> str:
"""Check to see if there are other names that we should be using for
a particular input. E.g. If CFC-11 or CFC11 was input, go on to use cfc11.
Args:
species : Input string that you're trying to match
lower : Return all lower case
allow_new_species : Return original value (may be lower case)
if this (or a synonym) is not found in the database.
If False, raise a ValueError.
species_filepath: Alternative species info file. Defaults to openghg_defs input.
Returns:
str: Matched species string
TODO: Decide if we need to make this lower case or not.
Included this here so this occurs in one place which can be linked to
and changed if needed.
"""
# If the species value is inert it should directly return rather than going through below logic
if species.lower() == "inert":
return species.lower()
# Load in the species data
species_data = get_species_info(species_filepath=species_filepath)
# First test whether site matches keys (case insensitive)
matched_strings = [k for k in species_data if k.upper() == species.upper()]
# Used to access the alternative names in species_data
alt_label = "alt"
# If not found, search synonyms
if not matched_strings:
for key in species_data:
# Iterate over the alternative labels and check for a match
matched_strings = [s for s in species_data[key][alt_label] if s.upper() == species.upper()]
if matched_strings:
matched_strings = [key]
break
if matched_strings:
updated_species = str(matched_strings[0])
if lower:
updated_species = updated_species.lower()
return updated_species
else:
if not allow_new_species:
raise ValueError(f"Unable to find species (or synonym) in database {species}")
if lower:
species = species.lower()
return species
LifetimeType = Optional[str | list[str]]
[docs]
def species_lifetime(species: str | None, species_filepath: optionalPathType = None) -> LifetimeType:
"""Find species lifetime.
This can either be labelled as "lifetime" or "lifetime_monthly".
Note: no species synonyms accepted yet
Args:
species : Species name e.g. "ch4" or "co2"
species_filepath: Alternative species info file. Defaults to openghg_defs input.
Returns:
str / list / None : Extracted lifetime or None is no lifetime was present.
"""
species_data = get_species_info(species_filepath=species_filepath)
if species is not None:
species_label = synonyms(species, lower=False, allow_new_species=False)
species_data = species_data[species_label]
else:
return None
lifetime_keywords = ["lifetime", "lifetime_monthly"]
for key in lifetime_keywords:
try:
lifetime: list | None = species_data[key]
except KeyError:
continue
else:
break
else:
lifetime = None
return lifetime
[docs]
def check_lifetime_monthly(lifetime: LifetimeType) -> bool:
"""Check whether retrieved lifetime value represents monthly lifetimes.
This checks whether lifetime is a list and contains 12 values.
Args:
lifetime : str or list representation of lifetime value
Returns:
bool : True of lifetime matches criteria for monthly data, False otherwise
Raises ValueError:
if lifetime is a list but does not contain exactly 12 entries, one for each month
"""
if isinstance(lifetime, list):
if len(lifetime) == 12:
return True
else:
raise ValueError(f"Invalid input for lifetime: {lifetime}")
else:
return False
def check_species_lifetime(species: str, short_lifetime: bool = False) -> bool:
"""
Check whether a species has a [short] lifetime (relevant for footprint types).
Args:
species: Name of species
short_lifetime: Flag for whether this species has a short lifetime
(and so requires a specific footprint with this taken into account)
Returns:
bool : The short_lifetime flag
"""
if species == "inert":
if short_lifetime is True:
raise ValueError(
"When indicating footprint is for short lived species, 'species' input must be included"
)
short_lifetime = False
lifetime = None
else:
lifetime = species_lifetime(species)
if lifetime is not None:
# TODO: May want to add a check on length of lifetime here
short_lifetime = True
logger.info("Updating short_lifetime to True since species has an associated lifetime")
return short_lifetime
def check_species_time_resolved(species: str, time_resolved: bool = False) -> bool:
"""
Check whether a species requires a time_resolved footprint.
Note: at the moment this is only relevant for "co2".
Args:
species: Name of species
Returns:
bool: The time_resolved flag
"""
species = synonyms(species, lower=True, allow_new_species=True)
if species == "co2":
if not time_resolved:
time_resolved = True
logger.info("Updating time_resolved to True for CO2 data")
return time_resolved
[docs]
def molar_mass(species: str, species_filepath: optionalPathType = None) -> float:
"""Extracts the molar mass of a species.
Args:
species : Species name
species_filepath: Alternative species info file. Defaults to openghg_defs input.
Returns:
float : Molar mass of species
"""
species_data = get_species_info(species_filepath=species_filepath)
species_label = synonyms(species, lower=False, allow_new_species=False)
molmass = float(species_data[species_label]["mol_mass"])
return molmass