Source code for openghg.transform._regrid

from typing import cast

import numpy as np
import xarray as xr
from numpy import ndarray

from openghg.types import construct_xesmf_import_error, ArrayLike, ArrayLikeMatch


def _getGridCC(lat: ndarray, lon: ndarray) -> tuple[ndarray, ndarray]:
    """
    Create a cell centered meshgrid for from 1D arrays of lon, lat
    This meshgrid defines the bounds of each cell.
    """
    # dx = lon[2]-lon[1]
    # dy = lat[2]-lat[1]
    dx = np.mean(lon[1:] - lon[:-1])
    dy = np.mean(lat[1:] - lat[:-1])
    lon = np.append(lon, lon[-1] + dx)
    lat = np.append(lat, lat[-1] + dy)
    lon -= dx / 2.0
    lat -= dy / 2.0
    LON, LAT = np.meshgrid(lon, lat)
    return LON, LAT


# def _create_xesmf_grid_uniform_cc(lat: ndarray, lon: ndarray) -> xr.Dataset:
#     """
#     Creates a Dataset ready to be used by the xesmf regridder from 1D arrays
#     of latitude and longitude values.

#     This includes both the centre points and bounds of the latitude and longitude
#     grids (needed for conservative regridding methods but not for bilinear etc.)
#     """
#     LON, LAT = np.meshgrid(lon, lat)
#     LON_b, LAT_b = _getGridCC(lat, lon)

#     grid = xr.Dataset(
#         {
#             "lon": (["x", "y"], LON),
#             "lat": (["x", "y"], LAT),
#             "lon_b": (["x_b", "y_b"], LON_b),
#             "lat_b": (["x_b", "y_b"], LAT_b),
#         }
#     )

#     return grid


def _create_uniform_coords(lat: ndarray, lon: ndarray) -> xr.Dataset:
    """
    Creates a Dataset ready to be used by the xesmf regridder from 1D arrays
    of latitude and longitude values.
    """

    ds_out = xr.Dataset(
        {
            "lat": (["lat"], lat, {"units": "degrees_north"}),
            "lon": (["lon"], lon, {"units": "degrees_east"}),
        }
    )

    return ds_out


def convert_to_ndarray(array: ndarray | xr.DataArray) -> ndarray:
    """Check and extract underlying numpy array from DataArray as necessary"""

    if isinstance(array, xr.DataArray):
        values = array.values
    else:
        values = array

    return values


[docs] def regrid_uniform_cc( data: ArrayLikeMatch, lat_out: ArrayLike, lon_out: ArrayLike, lat_in: ArrayLike | None = None, lon_in: ArrayLike | None = None, latlon: list | None = None, method: str = "conservative", ) -> ArrayLikeMatch: """ Regrid data between two uniform, cell centered grids. All coordinates (lat_out, lon_out, lat_in, lon_in) should be for the centre of the representative cell and in degrees. Adapted from code written by @DTHoare Args: data: Data to be regridded. Data must have dimensions (lat, lon) if 2D or (time, lat, lon) if 3D. lat_out: 1D array for output latitude grid lon_out: 1D array for output longituide grid lat_in: 1D array for input latitude grid. Only used if data is a numpy array and not a DataArray lon_in: 1D array for input longitude grid. Only used if data is a numpy array and not a DataArray latlon: Names for latitude and longitude coordinates within data. Default = ["lat", "lon"] method: Method to use for regridding. Mainly use: - "conservative" - "conservative_normed" (ignores NaN values) See xesmf documentation for full list of options. Returns: ndarray / DataArray : Regridded data using specified method """ try: import xesmf # type:ignore except ImportError as e: raise ImportError(construct_xesmf_import_error(e)) if latlon is None: latlon = ["lat", "lon"] if isinstance(data, xr.DataArray): lat_in_extracted = data[latlon[0]].values lon_in_extracted = data[latlon[1]].values if lat_in is not None: if not np.isclose(lat_in, lat_in_extracted): raise ValueError( "Input for 'lat_in' should not be supplied if 'data' is a DataArray object.\n" "Please check 'lat_out' have been supplied correctly as well." ) if lon_in is not None: if not np.isclose(lon_in, lon_in_extracted): raise ValueError( "Input for 'lon_in' should not be supplied if 'data' is a DataArray object.\n" "Please check 'lon_out' has been supplied correctly as well." ) lat_in = lat_in_extracted lon_in = lon_in_extracted lat_in = cast(ArrayLike, lat_in) lon_in = cast(ArrayLike, lon_in) if data.shape != (lat_in.size, lon_in.size): raise ValueError( f"Shape of input 'data' {data.shape}" f"does not match 'lat_in' and 'lon_in' lengths: {len(lat_in)}, {len(lon_in)}" ) lat_in = convert_to_ndarray(lat_in) lon_in = convert_to_ndarray(lon_in) lat_out = convert_to_ndarray(lat_out) lon_out = convert_to_ndarray(lon_out) # 09/03/2023: Don't seem to need inputs with centre and bounding boxes for # uniform grid and xesmf "conservative" method anymore. input_grid = _create_uniform_coords(lat_in, lon_in) output_grid = _create_uniform_coords(lat_out, lon_out) regridder = xesmf.Regridder(input_grid, output_grid, method) regridded: ArrayLikeMatch = regridder(data) # # 09/03/2023: Don't need this for uniform grid anymore due to changes above # # but a variant may be needed for non-uniform grids e.g. tropomi data. # if isinstance(regridded, xr.DataArray): # from scipy.sparse import coo_matrix # type:ignore # # Checking dimensions and mapping back lat_out and lon_out # # May be overkill but attempting to make sure this is done correctly. # # The regridded DataArray will contain dimensions of ('x', 'y') for # # data, 'lat' and 'lon' coordinates e.g. lon = [[-10,-10],[0,0]] # # This is to allow for the generic case where ('lat', 'lon') is not # # a rectilinear (uniform) grid. # # Since we are creating a uniform grid we want to flatten this and # # put data back on ('lat', 'lon') dimension. # regridded_stack = regridded.stack(z=("x", "y")) # lat_coord = regridded_stack["lat"] # lon_coord = regridded_stack["lon"] # # Find coords within our lat_out and lon_out grid for our regridded output # # Digitize is technically a binning operation outputting the number of the # # bin the value is within so to get indicies we can subtract 1. # lat_index = np.digitize(lat_coord, lat_out) - 1 # lon_index = np.digitize(lon_coord, lon_out) - 1 # # Create a sparse matrix from the flattened data and the lat, lon index values. # # Reshape to our required lat_out, lon_out and output as a numpy array # shape = (len(lat_out), len(lon_out)) # regridded_grid = coo_matrix((regridded_stack.data, (lat_index, lon_index)), shape=shape).toarray() # regridded = xr.DataArray( # regridded_grid, # dims=("lat", "lon"), # coords={"lat": lat_out, "lon": lon_out}, # attrs=regridded.attrs, # ) # # # Alternative, more simplistic approach. May not be generic # # regridded = regridded.drop(labels=("lat","lon")) # # regridded = regridded.assign_coords(**{"x":output_lat,"y":output_lon}) # # regridded = regridded.rename({"x":"lat","y":"lon"}) # # In later versions of xesmf (0.7?) this no longer seems to be needed # # as weight files are not stored on disk. # regridder.clean_weight_file() return regridded
# def regrid_betweenGrids(data, input_grid, output_grid, method="conservative"): # """ # Regrid data from predefined input_grid and output_grid # Inputs # data - numpy array # input_grid, output_grid - Dataset of the form: # xr.Dataset({'lat': (['x', 'y'], LAT), # 'lon': (['x', 'y'], LON), # 'lat_b': (['x_b', 'y_b'], LAT_b), # 'lon_b': (['x_b', 'y_b'], LON_b)}) # where lat and lon give cell centre locations, and lat_b and lon_b give the cell bounds (corners) # method - string describing method to use. # Should be one of "conservative" or "conservative_normed". # Note that to use the conservative_normed method you need to have installed the "masking" development # branch of the xesmf package (contact Daniel for more details). # With the 'masking' branch of xESMF you can include a mask in the input_grid to ignore nan values # returns # regridded numpy array # """ # try: # import xesmf # except ImportError: # raise ImportError(xesmf_error_message) # #if 'mask' in input_grid: # # # !!! Requires the 'masking' branch of xESMF to be manually installed !!! # # method = 'conservative_normed' # #else: # #method = 'conservative' # regridder = xesmf.Regridder(input_grid, output_grid, method) # regridded = regridder( data ) # regridder.clean_weight_file() # return regridded