Source code for openghg.util._util
""" Utility functions that are used by multiple modules
"""
from collections.abc import Iterable
from typing import Any, Dict, Iterator, Optional, Tuple
import logging
logger = logging.getLogger("openghg.util")
logger.setLevel(logging.DEBUG) # Have to set level for logger as well as handler
[docs]def running_in_cloud() -> bool:
"""Are we running in the cloud?
Checks for the OPENGHG_CLOUD environment variable being set
Returns:
bool: True if running in cloud
"""
from os import environ
cloud_env = environ.get("OPENGHG_CLOUD", "0")
return bool(int(cloud_env))
[docs]def running_on_hub() -> bool:
"""Are we running on the OpenGHG Hub?
Checks for the OPENGHG_CLOUD environment variable being set
Returns:
bool: True if running in cloud
"""
from os import environ
hub_env = environ.get("OPENGHG_HUB", "0")
return bool(int(hub_env))
[docs]def running_locally() -> bool:
"""Are we running OpenGHG locally?
Returns:
bool: True if running locally
"""
return not (running_on_hub() or running_in_cloud())
[docs]def unanimous(seq: Dict) -> bool:
"""Checks that all values in an iterable object
are the same
Args:
seq: Iterable object
Returns
bool: True if all values are the same
"""
it = iter(seq.values())
try:
first = next(it)
except StopIteration:
return True
else:
return all(i == first for i in it)
[docs]def pairwise(iterable: Iterable) -> Iterator[Tuple[Any, Any]]:
"""Return a zip of an iterable where a is the iterable
and b is the iterable advanced one step.
Args:
iterable: Any iterable type
Returns:
tuple: Tuple of iterables
"""
from itertools import tee
a, b = tee(iterable)
next(b, None)
return zip(a, b)
[docs]def site_code_finder(site_name: str) -> Optional[str]:
"""Find the site code for a given site name.
Args:
site_name: Site long name
Returns:
str or None: Three letter site code if found
"""
from openghg.util import remove_punctuation
from rapidfuzz import process # type: ignore
site_name = remove_punctuation(site_name)
inverted = _create_site_lookup_dict()
matches = process.extract(query=site_name, choices=inverted.keys())
highest_score = matches[0][1]
if highest_score < 90:
return None
# If there are multiple >= 90 matches we return None as this is ambiguous
greater_than_90 = sum(match[1] >= 90 for match in matches)
if greater_than_90 > 1:
logger.warning("Please provide more site information, more than one site found.")
return None
matched_site = matches[0][0]
site_code: str = inverted[matched_site]
return site_code.lower()
[docs]def find_matching_site(site_name: str, possible_sites: Dict) -> str:
"""Try and find a similar name to site_name in site_list and return a suggestion or
error string.
Args:
site_name: Name of site
site_list: List of sites to check
Returns:
str: Suggestion / error message
"""
from rapidfuzz import process
site_list = possible_sites.keys()
matches = process.extract(site_name, site_list)
scores = [s for m, s, _ in matches]
# This seems like a decent cutoff score for a decent find
cutoff_score = 85
if scores[0] < cutoff_score:
return f"No suggestion for {site_name}."
elif scores[0] > cutoff_score and scores[0] > scores[1]:
best_match = matches[0][0]
return f"Did you mean {best_match.upper()}, code: {possible_sites[best_match]} ?"
elif scores[0] == scores[1]:
suggestions = [f"{match.title()}, code: {possible_sites[match]}" for match, _, _ in matches]
nl_char = "\n"
return f"Did you mean one of : \n {nl_char.join(suggestions)}"
else:
return f"Unknown site: {site_name}"
def _create_site_lookup_dict() -> Dict:
"""Create a dictionary of site name: three letter site code values
Returns:
dict: Dictionary of site_name: site_code values
"""
from openghg_defs import site_info_file
from openghg.util import load_json, remove_punctuation
site_info = load_json(path=site_info_file)
inverted = {}
for site, site_data in site_info.items():
for _, network_data in site_data.items():
try:
long_name = network_data["long_name"]
except KeyError:
pass
else:
# Remove the country from the name
try:
no_country = remove_punctuation(long_name.split(",")[0])
except IndexError:
no_country = remove_punctuation(long_name)
inverted[no_country] = site
break
return inverted
[docs]def verify_site(site: str) -> Optional[str]:
"""Check if the passed site is a valid one and returns the three
letter site code if found. Otherwise we use fuzzy text matching to suggest
sites with similar names.
Args:
site: Three letter site code or site name
Returns:
str: Verified three letter site code if valid site
"""
from openghg.util import load_json
from openghg_defs import site_info_file
site_data = load_json(path=site_info_file)
if site.upper() in site_data:
return site.lower()
else:
site_code = site_code_finder(site_name=site)
if site_code is None:
logger.warning(f"Unable to find site code for {site}, please provide additional metadata.")
return site_code
[docs]def multiple_inlets(site: str) -> bool:
"""Check if the passed site has more than one inlet
Args:
site: Three letter site code
Returns:
bool: True if multiple inlets
"""
from openghg.util import get_site_info
site_data = get_site_info()
site = site.upper()
network = next(iter(site_data[site]))
try:
heights = set(site_data[network]["height"])
except KeyError:
try:
heights = set(site_data[network]["height_name"])
except KeyError:
return True
return len(heights) > 1