Source code for FirnCorr.datasets.fetch_gsfcfdm

"""
fetch_gsfcfdm.py
Written by Tyler Sutterley (05/2026)
Downloads NASA GSFC Firn Densification Model (GSFC-fdm) model outputs

PROGRAM DEPENDENCIES:
    utilities.py: download and management utilities for syncing files

UPDATE HISTORY:
    Written 05/2026
"""

import re
import shutil
import logging
import pathlib
import zipfile
import argparse
import FirnCorr.utilities

# default data directory for SMB and firn models
_default_directory = FirnCorr.utilities.get_cache_path()
# default ssl context
_default_ssl_context = FirnCorr.utilities._default_ssl_context
# repository API urls
_zenodo_api_url = "https://zenodo.org/api"


[docs] def fetch_gsfcfdm( record: str, directory: str | pathlib.Path = _default_directory, timeout: int | None = None, clobber: bool = False, chunk: int = 16384, mode: int = 0o775, ): """ Syncs GSFC-fdm model outputs for a given zenodo record Parameters ---------- record: str Zenodo record number directory: str or pathlib.Path Working data directory timeout: int, default None Timeout in seconds for blocking operations clobber: bool, default False Overwrite existing data chunk: int, default 16384 Chunk size for copying files in bytes mode: int, default 0o775 Permission mode of the local directories and files (number in octal) """ # standard output (terminal output) logging.basicConfig(level=logging.INFO) # check if local directory exists and recursively create if not directory = pathlib.Path(directory).expanduser().absolute() directory.mkdir(exist_ok=True, parents=True, mode=mode) # zenodo API host HOST = FirnCorr.utilities.URL(_zenodo_api_url) records_api = HOST.joinpath("records", record) logging.debug(records_api) # get record information and load JSON response records_response = records_api.load(context=_default_ssl_context) version = str(records_response["id"]) # regular expression pattern for extracting parameters regex_pattern = r"(v\d+.*?)_(ais|gris)(.*?)\.(.*?)$" rx = re.compile(regex_pattern, re.IGNORECASE) # get files from latest version of record deposit_api = HOST.joinpath("deposit", "depositions", version, "files") logging.debug(deposit_api) # Create and submit request and load JSON response deposit_response = deposit_api.load( timeout=timeout, context=_default_ssl_context ) # for each file in the JSON response for deposits for f in deposit_response: # search for pattern in filename match = rx.search(f["filename"]) # check if needing to include algorithm in the hash comparison include_algorithm = re.match(r"md5\:", f["checksum"]) # skip file if pattern is not found if not match: logging.debug(f"Skipping file: {f['filename']}") continue # extract parameters from filename gsfcfdm_version = match.group(1).replace("_", ".") # check if local directory exists and recursively create if not local_directory = directory.joinpath("GSFC-fdm", gsfcfdm_version) local_directory.mkdir(exist_ok=True, parents=True, mode=mode) # full path to output file local_file = local_directory.joinpath(f["filename"]) # check if file already exists by matching MD5 checksums original_md5 = FirnCorr.utilities.get_hash( local_file, include_algorithm=include_algorithm ) # skip download if checksums match if original_md5 == f["checksum"] and not clobber: continue # download url for remote file download = FirnCorr.utilities.URL(f["links"]["download"]) # output file information logging.info(download.urlname) # get remote file as a byte-stream remote_buffer = download.get( timeout=timeout, context=_default_ssl_context ) # verify MD5 checksums computed_md5 = FirnCorr.utilities.get_hash( remote_buffer, include_algorithm=include_algorithm ) # raise exception if checksums do not match if computed_md5 != f["checksum"]: raise Exception(f"Checksum mismatch: {download.urlname}") # download file or extract files from zip if pathlib.Path(f["filename"]).suffix == ".zip": # extract the zip file into the local directory with zipfile.ZipFile(remote_buffer) as z: # extract each file and set permissions for member in z.filelist: # extract the file to the local directory local_file = local_directory.joinpath(member.filename) logging.info(f"\t--> {local_file}") z.extract(path=local_directory, member=member) # change the permissions mode local_file.chmod(mode=mode) # create symbolic link symlink = rx.sub(r"\1_\2.\4", member.filename) symlink_file = local_file.with_name(symlink) FirnCorr.utilities.symlink(local_file, symlink_file) else: # write the file to the local directory logging.info(f"\t--> {local_file}") with local_file.open(mode="wb") as f: shutil.copyfileobj(remote_buffer, f, chunk) # change the permissions mode local_file.chmod(mode=mode) # create symbolic link symlink = rx.sub(r"\1_\2.\4", f["filename"]) symlink_file = local_file.with_name(symlink) FirnCorr.utilities.symlink(local_file, symlink_file)
# PURPOSE: create argument parser def arguments(): parser = argparse.ArgumentParser( description="""Downloads NASA GSFC Firn Densification Model (GSFC-fdm) model outputs """, fromfile_prefix_chars="@", ) # command line parameters # working data directory parser.add_argument( "--directory", "-D", type=pathlib.Path, default=_default_directory, help="Working data directory", ) # zenodo record number parser.add_argument( "--record", "-R", type=str, default="7054573", help="Zenodo record", ) # connection timeout parser.add_argument( "--timeout", "-T", type=int, default=120, help="Timeout in seconds for blocking operations", ) # clobber will overwrite the existing data parser.add_argument( "--clobber", "-C", default=False, action="store_true", help="Overwrite existing data", ) # permissions mode of the local directories and files (number in octal) parser.add_argument( "--mode", "-M", type=lambda x: int(x, base=8), default=0o775, help="Permission mode of directories and files downloaded", ) # return the parser return parser # This is the main part of the program that calls the individual modules def main(): # Read the system arguments listed after the program parser = arguments() args, _ = parser.parse_known_args() # run program for record fetch_gsfcfdm( args.record, directory=args.directory, timeout=args.timeout, clobber=args.clobber, mode=args.mode, ) if __name__ == "__main__": main()