Skip to content

Commit

Permalink
fix: update CMR search utility to replace deprecated scrolling (#147)
Browse files Browse the repository at this point in the history
  • Loading branch information
tsutterley authored Oct 17, 2024
1 parent 4b1c32f commit 01b9449
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 67 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
os: [ubuntu-latest, macos-latest]
python-version: [3.11]
env:
OS: ${{ matrix.os }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
.Rhistory
__pycache__
build/
_build/
dist/
develop-eggs/
run/
Expand Down
2 changes: 1 addition & 1 deletion doc/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: gravity-docs
channels:
- conda-forge
dependencies:
- docutils<0.18
- docutils
- fontconfig
- freetype
- future
Expand Down
6 changes: 3 additions & 3 deletions doc/make.bat
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ if "%SPHINXBUILD%" == "" (
set SOURCEDIR=source
set BUILDDIR=build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
Expand All @@ -21,10 +19,12 @@ if errorlevel 9009 (
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
echo.https://www.sphinx-doc.org/
exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

Expand Down
2 changes: 2 additions & 0 deletions doc/source/api_reference/utilities.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ General Methods

.. autofunction:: gravity_toolkit.utilities.from_http

.. autofunction:: gravity_toolkit.utilities.from_json

.. autofunction:: gravity_toolkit.utilities.attempt_login

.. autofunction:: gravity_toolkit.utilities.build_opener
Expand Down
12 changes: 6 additions & 6 deletions gravity_toolkit/geocenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def case_insensitive_filename(self, filename):
f = [f.name for f in self.filename.parent.iterdir() if
re.match(self.filename.name, f.name, re.I)]
if not f:
errmsg = f'{filename} not found in file system'
raise FileNotFoundError(errmsg)
msg = f'{filename} not found in file system'
raise FileNotFoundError(msg)
self.filename = self.filename.with_name(f.pop())
# return the filename
return self
Expand Down Expand Up @@ -160,8 +160,8 @@ def from_AOD1B(self, release, year, month, product='glo'):
AOD1B_file = self.directory.joinpath(granule)
# check that file exists
if not AOD1B_file.exists():
errmsg = f'AOD1B File {AOD1B_file} not in File System'
raise FileNotFoundError(errmsg)
msg = f'AOD1B File {AOD1B_file} not in File System'
raise FileNotFoundError(msg)
# read AOD1b geocenter skipping over commented header text
with AOD1B_file.open(mode='r', encoding='utf8') as f:
file_contents=[i for i in f.read().splitlines() if not re.match(r'#',i)]
Expand Down Expand Up @@ -356,8 +356,8 @@ def from_SLR(self, geocenter_file, **kwargs):
self.directory = base_dir.joinpath('AOD1B', kwargs['release'], 'geocenter')
# check that AOD1B directory exists
if not self.directory.exists():
errmsg = f'{str(self.directory)} not found in file system'
raise FileNotFoundError(errmsg)
msg = f'{str(self.directory)} not found in file system'
raise FileNotFoundError(msg)

# Input geocenter file and split lines
with self.filename.open(mode='r', encoding='utf8') as f:
Expand Down
6 changes: 3 additions & 3 deletions gravity_toolkit/harmonics.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ def case_insensitive_filename(self, filename):
f = [f.name for f in self.filename.parent.iterdir() if
re.match(self.filename.name, f.name, re.I)]
if not f:
errmsg = f'{filename} not found in file system'
raise FileNotFoundError(errmsg)
msg = f'{filename} not found in file system'
raise FileNotFoundError(msg)
self.filename = self.filename.with_name(f.pop())
# return the filename
return self
Expand Down Expand Up @@ -1181,7 +1181,7 @@ def to_masked_array(self):
self.squeeze()
# return the triangular matrix
return Ylms

def to_coo_array(self):
"""
Convert data arrays to a COO sparse matrices
Expand Down
23 changes: 11 additions & 12 deletions gravity_toolkit/spatial.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
spatial.py
Written by Tyler Sutterley (06/2024)
Written by Tyler Sutterley (10/2024)
Data class for reading, writing and processing spatial data
Expand All @@ -20,6 +20,7 @@
time.py: utilities for calculating time operations
UPDATE HISTORY:
Updated 10/2024: allow 2D and 3D arrays in output netCDF4 files
Updated 06/2024: use wrapper to importlib for optional dependencies
Updated 05/2024: make subscriptable and allow item assignment
Updated 10/2023: place time and month variables in try/except block
Expand Down Expand Up @@ -157,8 +158,8 @@ def case_insensitive_filename(self, filename):
f = [f.name for f in self.filename.parent.iterdir() if
re.match(self.filename.name, f.name, re.I)]
if not f:
errmsg = f'{filename} not found in file system'
raise FileNotFoundError(errmsg)
msg = f'{filename} not found in file system'
raise FileNotFoundError(msg)
self.filename = self.filename.with_name(f.pop())
# return the filename
return self
Expand Down Expand Up @@ -877,8 +878,9 @@ def to_netCDF4(self, filename, **kwargs):
variables = set(kwargs['field_mapping'].keys()) - set(dimensions)
for field in sorted(variables):
temp = getattr(self,field)
ndim = temp.ndim
key = kwargs['field_mapping'][field]
nc[key] = fileID.createVariable(key, temp.dtype, dims,
nc[key] = fileID.createVariable(key, temp.dtype, dims[:ndim],
fill_value=self.fill_value, zlib=True)
# filling NetCDF variables
for field,key in kwargs['field_mapping'].items():
Expand Down Expand Up @@ -1360,20 +1362,17 @@ def subset(self, months):
# indices to sort data objects
months_list = [i for i,m in enumerate(self.month) if m in months]
# output spatial object
temp = spatial(nlat=self.shape[0], nlon=self.shape[1],
fill_value=self.fill_value)
temp = self.zeros_like()
# create output spatial object
temp.data = np.zeros((temp.shape[0],temp.shape[1],n))
temp.mask = np.zeros((temp.shape[0],temp.shape[1],n), dtype=bool)
temp.data = np.zeros((self.shape[0],self.shape[1],n))
temp.mask = np.zeros((self.shape[0],self.shape[1],n), dtype=bool)
# create output spatial error
try:
getattr(self, 'error')
temp.error = np.zeros((temp.shape[0],temp.shape[1],n))
temp.error = np.zeros((self.shape[0],self.shape[1],n))
except AttributeError:
pass
# copy dimensions
temp.lon = self.lon.copy()
temp.lat = self.lat.copy()
# allocate for output dates
temp.time = np.zeros((n))
temp.month = np.zeros((n),dtype=np.int64)
temp.filename = []
Expand Down
89 changes: 64 additions & 25 deletions gravity_toolkit/utilities.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
#!/usr/bin/env python
u"""
utilities.py
Written by Tyler Sutterley (06/2024)
Written by Tyler Sutterley (10/2024)
Download and management utilities for syncing time and auxiliary files
PYTHON DEPENDENCIES:
lxml: processing XML and HTML in Python
https://pypi.python.org/pypi/lxml
UPDATE HISTORY:
Updated 10/2024: update CMR search utility to replace deprecated scrolling
https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html
Updated 08/2024: generalize hash function to use any available algorithm
Updated 06/2024: added wrapper to importlib for optional dependencies
make default case for an import exception be a class
Updated 04/2024: added argument for products in CMR shortname query
Expand Down Expand Up @@ -114,7 +117,7 @@ def import_dependency(
):
"""
Import an optional dependency
Adapted from ``pandas.compat._optional::import_optional_dependency``
Parameters
Expand Down Expand Up @@ -166,7 +169,7 @@ def __get__(self, inst, objtype=None):
# PURPOSE: get the hash value of a file
def get_hash(
local: str | io.IOBase | pathlib.Path,
algorithm: str = 'MD5'
algorithm: str = 'md5'
):
"""
Get the hash value from a local file or ``BytesIO`` object
Expand All @@ -175,18 +178,16 @@ def get_hash(
----------
local: obj, str or pathlib.Path
BytesIO object or path to file
algorithm: str, default 'MD5'
algorithm: str, default 'md5'
hashing algorithm for checksum validation
- ``'MD5'``: Message Digest
- ``'sha1'``: Secure Hash Algorithm
"""
# check if open file object or if local file exists
if isinstance(local, io.IOBase):
if (algorithm == 'MD5'):
return hashlib.md5(local.getvalue()).hexdigest()
elif (algorithm == 'sha1'):
return hashlib.sha1(local.getvalue()).hexdigest()
# generate checksum hash for a given type
if algorithm in hashlib.algorithms_available:
return hashlib.new(algorithm, local.getvalue()).hexdigest()
else:
raise ValueError(f'Invalid hashing algorithm: {algorithm}')
elif isinstance(local, (str, pathlib.Path)):
# generate checksum hash for local file
local = pathlib.Path(local).expanduser()
Expand All @@ -196,10 +197,10 @@ def get_hash(
# open the local_file in binary read mode
with local.open(mode='rb') as local_buffer:
# generate checksum hash for a given type
if (algorithm == 'MD5'):
return hashlib.md5(local_buffer.read()).hexdigest()
elif (algorithm == 'sha1'):
return hashlib.sha1(local_buffer.read()).hexdigest()
if algorithm in hashlib.algorithms_available:
return hashlib.new(algorithm, local_buffer.read()).hexdigest()
else:
raise ValueError(f'Invalid hashing algorithm: {algorithm}')
else:
return ''

Expand Down Expand Up @@ -813,6 +814,44 @@ def from_http(
remote_buffer.seek(0)
return remote_buffer

# PURPOSE: load a JSON response from a http host
def from_json(
HOST: str | list,
timeout: int | None = None,
context: ssl.SSLContext = _default_ssl_context
) -> dict:
"""
Load a JSON response from a http host
Parameters
----------
HOST: str or list
remote http host path split as list
timeout: int or NoneType, default None
timeout in seconds for blocking operations
context: obj, default pyTMD.utilities._default_ssl_context
SSL context for ``urllib`` opener object
"""
# verify inputs for remote http host
if isinstance(HOST, str):
HOST = url_split(HOST)
# try loading JSON from http
try:
# Create and submit request for JSON response
request = urllib2.Request(posixpath.join(*HOST))
request.add_header('Accept', 'application/json')
response = urllib2.urlopen(request, timeout=timeout, context=context)
except urllib2.HTTPError as exc:
logging.debug(exc.code)
raise RuntimeError(exc.reason) from exc
except urllib2.URLError as exc:
logging.debug(exc.reason)
msg = 'Load error from {0}'.format(posixpath.join(*HOST))
raise Exception(msg) from exc
else:
# load JSON response
return json.loads(response.read())

# PURPOSE: attempt to build an opener with netrc
def attempt_login(
urs: str,
Expand Down Expand Up @@ -1196,7 +1235,7 @@ def s3_region():
boto3 = import_dependency('boto3')
region_name = boto3.session.Session().region_name
return region_name

# PURPOSE: get AWS s3 client for PO.DAAC Cumulus
def s3_client(
HOST: str = _s3_endpoints['podaac'],
Expand Down Expand Up @@ -1819,7 +1858,6 @@ def cmr(
CMR_KEYS.append(f'?provider={provider}')
CMR_KEYS.append('&sort_key[]=start_date')
CMR_KEYS.append('&sort_key[]=producer_granule_id')
CMR_KEYS.append('&scroll=true')
CMR_KEYS.append(f'&page_size={cmr_page_size}')
# dictionary of product shortnames
short_names = cmr_product_shortname(mission, center, release,
Expand All @@ -1844,20 +1882,21 @@ def cmr(
granule_names = []
granule_urls = []
granule_mtimes = []
cmr_scroll_id = None
cmr_search_after = None
while True:
req = urllib2.Request(cmr_query_url)
if cmr_scroll_id:
req.add_header('cmr-scroll-id', cmr_scroll_id)
# add CMR search after header
if cmr_search_after:
req.add_header('CMR-Search-After', cmr_search_after)
logging.debug(f'CMR-Search-After: {cmr_search_after}')
response = opener.open(req)
# get scroll id for next iteration
if not cmr_scroll_id:
headers = {k.lower():v for k,v in dict(response.info()).items()}
cmr_scroll_id = headers['cmr-scroll-id']
# get search after index for next iteration
headers = {k.lower():v for k,v in dict(response.info()).items()}
cmr_search_after = headers.get('cmr-search-after')
# read the CMR search as JSON
search_page = json.loads(response.read().decode('utf8'))
ids,urls,mtimes = cmr_filter_json(search_page, endpoint=endpoint)
if not urls:
if not urls or cmr_search_after is None:
break
# extend lists
granule_names.extend(ids)
Expand Down
2 changes: 1 addition & 1 deletion scripts/plot_global_grid_3maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def plot_grid(base_dir, FILENAMES,
cbar.solids.set_rasterized(True)
# Add label to the colorbar
cbar.ax.set_ylabel(CBTITLE, labelpad=10, fontsize=18)
cbar.ax.set_title(CBUNITS, fontsize=18, va='bottom')
cbar.ax.set_title(CBUNITS, fontsize=18, va='bottom', y=-1.4)
# Set the tick levels for the colorbar
cbar.set_ticks(levels)
cbar.set_ticklabels([CBFORMAT.format(ct) for ct in levels])
Expand Down
Loading

0 comments on commit 01b9449

Please sign in to comment.