Source code for FirnCorr.io.GSFCfdm

#!/usr/bin/env python
"""
GSFCfdm.py
Written by Tyler Sutterley (04/2026)
Reads GSFC-fdm data products provided by Brooke Medley (NASA GSFC)

PYTHON DEPENDENCIES:
    numpy: Scientific Computing Tools For Python
        https://numpy.org
        https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
    xarray: N-D labeled arrays and datasets in Python
        https://docs.xarray.dev/en/stable/

UPDATE HISTORY:
    Updated 04/2026: added lineage attribute to save model filename
    Written 04/2026
"""

from __future__ import print_function

import re
import gzip
import logging
import pathlib
import xarray as xr
import numpy as np

# mapping of netCDF4 variable names to internal variable names
_variable_mapping = {
    "SMB": "SMB",
    "Ra": "rainfall",
    "Ru": "runoff",
    "Sn-Ev": "snowfall",  # technically net as it is minus sublimation
    "Me": "snowmelt",
    "FAC": "zfirn",
    "h_a": "zsurf",
    "Me_a": "zmelt",
    "SMB_a": "zsmb",
    "cum_smb_anomaly": "zsmb",  # old variable name for SMB_a
    "height": "zsurf",  # old variable name for h_a
}

# PROJ4 parameters for GSFC-fdm model projections
# ais: WGS84 / Polar Stereographic South
# gris: WGS84 / NSIDC Sea Ice Polar Stereographic North
proj4_params = dict(ais=3031, gris=3413)



[docs]
def open_dataset(
    filename: str | pathlib.Path,
    variable: str | list[str],
    chunks: str | None = None,
    **kwargs,
):
    """
    Open a netCDF4 file containing GSFC-fdm data

    Parameters
    ----------
    filename: str or pathlib.Path
        Path to netCDF4 file containing GSFC-fdm data
    variable: str or list
        netCDF4 variable name(s) to extract
    chunks: str or None, default None
        Chunk size for ``xarray`` dataset
    compressed: bool, default False
        If True, read gzipped netCDF4 file
    """
    # set default keyword arguments
    kwargs.setdefault("compressed", False)
    # regular expression pattern for extracting parameters
    (region,) = re.findall(r"(ais|gris)", pathlib.Path(filename).stem)
    # read the netCDF4-format file
    logging.debug(filename)
    if kwargs["compressed"]:
        # read gzipped netCDF4 file
        f = gzip.open(filename, "rb")
        tmp = xr.open_dataset(f, mask_and_scale=True, chunks=chunks)
    else:
        tmp = xr.open_dataset(filename, mask_and_scale=True, chunks=chunks)
    # output dataset
    ds = xr.Dataset()
    # decode time variables
    ds["time"] = decode_times(tmp["time"])
    # extract x and y coordinate arrays
    if tmp["x"].ndim == 2:
        ds["y"] = tmp["y"].isel(x=0).squeeze()
        ds["x"] = tmp["x"].isel(y=0).squeeze()
    else:
        ds["y"] = tmp["y"].copy()
        ds["x"] = tmp["x"].copy()
    # check if variable is a string
    if isinstance(variable, str):
        variable = [variable]
    # loop over variables to extract
    for var in variable:
        # read dataset and remove singleton dimensions
        imap = _variable_mapping.get(var, var)
        # convert dimension order from matlab (time, x, y)
        ds[imap] = tmp[var].squeeze().transpose("time", "y", "x")
        # replace points where all values are zero with NaNs
        ds[imap] = ds[imap].where(
            ds[imap].sum(dim="time", skipna=False) != 0, np.nan, drop=False
        )
        # add attributes for variable
        ds[imap].attrs["group"] = var
        ds[imap].attrs["units"] = tmp[var].attrs.get("units", "")
    # drop coordinates that are not in the dimensions
    drop_coords = [c for c in ds.coords if c not in ds.dims]
    ds = ds.drop_vars(drop_coords)
    # add attributes to dataset
    ds.attrs["lineage"] = pathlib.Path(filename).name
    ds.attrs["crs"] = proj4_params[region]
    # return the dataset
    return ds




[docs]
def decode_times(variable: xr.DataArray) -> xr.DataArray:
    """Decode time variable to be monotonic and in ``datetime`` format"""
    try:
        # unit long_name attribute for variable
        long_name = variable.attrs.get("long_name")
        # get time step and unit from long_name attribute
        (step, unit) = re.findall(r"(\d+)(.*?)resolution", long_name).pop()
        # convert time step to numpy timedelta64 format (hours)
        time_step = np.timedelta64(24 * int(step), "h")
    except Exception as exc:
        # if cannot be parsed: return variable without modification
        return variable
    # convert time variables to be monotonically increasing
    start_time = np.datetime64(f"{variable.values[0]:0.0f}", "h")
    variable = start_time + time_step * (0.5 + np.arange(variable.size))
    return variable