Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: update CMR search utility to replace deprecated scrolling #147

Merged
merged 3 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
os: [ubuntu-latest, macos-latest]
python-version: [3.11]
env:
OS: ${{ matrix.os }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
.Rhistory
__pycache__
build/
_build/
dist/
develop-eggs/
run/
Expand Down
2 changes: 1 addition & 1 deletion doc/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: gravity-docs
channels:
- conda-forge
dependencies:
- docutils<0.18
- docutils
- fontconfig
- freetype
- future
Expand Down
6 changes: 3 additions & 3 deletions doc/make.bat
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ if "%SPHINXBUILD%" == "" (
set SOURCEDIR=source
set BUILDDIR=build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
Expand All @@ -21,10 +19,12 @@ if errorlevel 9009 (
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
echo.https://www.sphinx-doc.org/
exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

Expand Down
2 changes: 2 additions & 0 deletions doc/source/api_reference/utilities.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ General Methods

.. autofunction:: gravity_toolkit.utilities.from_http

.. autofunction:: gravity_toolkit.utilities.from_json

.. autofunction:: gravity_toolkit.utilities.attempt_login

.. autofunction:: gravity_toolkit.utilities.build_opener
Expand Down
12 changes: 6 additions & 6 deletions gravity_toolkit/geocenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def case_insensitive_filename(self, filename):
f = [f.name for f in self.filename.parent.iterdir() if
re.match(self.filename.name, f.name, re.I)]
if not f:
errmsg = f'{filename} not found in file system'
raise FileNotFoundError(errmsg)
msg = f'{filename} not found in file system'
raise FileNotFoundError(msg)
self.filename = self.filename.with_name(f.pop())
# return the filename
return self
Expand Down Expand Up @@ -160,8 +160,8 @@ def from_AOD1B(self, release, year, month, product='glo'):
AOD1B_file = self.directory.joinpath(granule)
# check that file exists
if not AOD1B_file.exists():
errmsg = f'AOD1B File {AOD1B_file} not in File System'
raise FileNotFoundError(errmsg)
msg = f'AOD1B File {AOD1B_file} not in File System'
raise FileNotFoundError(msg)
# read AOD1b geocenter skipping over commented header text
with AOD1B_file.open(mode='r', encoding='utf8') as f:
file_contents=[i for i in f.read().splitlines() if not re.match(r'#',i)]
Expand Down Expand Up @@ -356,8 +356,8 @@ def from_SLR(self, geocenter_file, **kwargs):
self.directory = base_dir.joinpath('AOD1B', kwargs['release'], 'geocenter')
# check that AOD1B directory exists
if not self.directory.exists():
errmsg = f'{str(self.directory)} not found in file system'
raise FileNotFoundError(errmsg)
msg = f'{str(self.directory)} not found in file system'
raise FileNotFoundError(msg)

# Input geocenter file and split lines
with self.filename.open(mode='r', encoding='utf8') as f:
Expand Down
6 changes: 3 additions & 3 deletions gravity_toolkit/harmonics.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ def case_insensitive_filename(self, filename):
f = [f.name for f in self.filename.parent.iterdir() if
re.match(self.filename.name, f.name, re.I)]
if not f:
errmsg = f'{filename} not found in file system'
raise FileNotFoundError(errmsg)
msg = f'{filename} not found in file system'
raise FileNotFoundError(msg)
self.filename = self.filename.with_name(f.pop())
# return the filename
return self
Expand Down Expand Up @@ -1181,7 +1181,7 @@ def to_masked_array(self):
self.squeeze()
# return the triangular matrix
return Ylms

def to_coo_array(self):
"""
Convert data arrays to a COO sparse matrices
Expand Down
23 changes: 11 additions & 12 deletions gravity_toolkit/spatial.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
spatial.py
Written by Tyler Sutterley (06/2024)
Written by Tyler Sutterley (10/2024)
Data class for reading, writing and processing spatial data
Expand All @@ -20,6 +20,7 @@
time.py: utilities for calculating time operations
UPDATE HISTORY:
Updated 10/2024: allow 2D and 3D arrays in output netCDF4 files
Updated 06/2024: use wrapper to importlib for optional dependencies
Updated 05/2024: make subscriptable and allow item assignment
Updated 10/2023: place time and month variables in try/except block
Expand Down Expand Up @@ -157,8 +158,8 @@ def case_insensitive_filename(self, filename):
f = [f.name for f in self.filename.parent.iterdir() if
re.match(self.filename.name, f.name, re.I)]
if not f:
errmsg = f'{filename} not found in file system'
raise FileNotFoundError(errmsg)
msg = f'{filename} not found in file system'
raise FileNotFoundError(msg)
self.filename = self.filename.with_name(f.pop())
# return the filename
return self
Expand Down Expand Up @@ -877,8 +878,9 @@ def to_netCDF4(self, filename, **kwargs):
variables = set(kwargs['field_mapping'].keys()) - set(dimensions)
for field in sorted(variables):
temp = getattr(self,field)
ndim = temp.ndim
key = kwargs['field_mapping'][field]
nc[key] = fileID.createVariable(key, temp.dtype, dims,
nc[key] = fileID.createVariable(key, temp.dtype, dims[:ndim],
fill_value=self.fill_value, zlib=True)
# filling NetCDF variables
for field,key in kwargs['field_mapping'].items():
Expand Down Expand Up @@ -1360,20 +1362,17 @@ def subset(self, months):
# indices to sort data objects
months_list = [i for i,m in enumerate(self.month) if m in months]
# output spatial object
temp = spatial(nlat=self.shape[0], nlon=self.shape[1],
fill_value=self.fill_value)
temp = self.zeros_like()
# create output spatial object
temp.data = np.zeros((temp.shape[0],temp.shape[1],n))
temp.mask = np.zeros((temp.shape[0],temp.shape[1],n), dtype=bool)
temp.data = np.zeros((self.shape[0],self.shape[1],n))
temp.mask = np.zeros((self.shape[0],self.shape[1],n), dtype=bool)
# create output spatial error
try:
getattr(self, 'error')
temp.error = np.zeros((temp.shape[0],temp.shape[1],n))
temp.error = np.zeros((self.shape[0],self.shape[1],n))
except AttributeError:
pass
# copy dimensions
temp.lon = self.lon.copy()
temp.lat = self.lat.copy()
# allocate for output dates
temp.time = np.zeros((n))
temp.month = np.zeros((n),dtype=np.int64)
temp.filename = []
Expand Down
89 changes: 64 additions & 25 deletions gravity_toolkit/utilities.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
#!/usr/bin/env python
u"""
utilities.py
Written by Tyler Sutterley (06/2024)
Written by Tyler Sutterley (10/2024)
Download and management utilities for syncing time and auxiliary files

PYTHON DEPENDENCIES:
lxml: processing XML and HTML in Python
https://pypi.python.org/pypi/lxml

UPDATE HISTORY:
Updated 10/2024: update CMR search utility to replace deprecated scrolling
https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html
Updated 08/2024: generalize hash function to use any available algorithm
Updated 06/2024: added wrapper to importlib for optional dependencies
make default case for an import exception be a class
Updated 04/2024: added argument for products in CMR shortname query
Expand Down Expand Up @@ -114,7 +117,7 @@ def import_dependency(
):
"""
Import an optional dependency

Adapted from ``pandas.compat._optional::import_optional_dependency``

Parameters
Expand Down Expand Up @@ -166,7 +169,7 @@ def __get__(self, inst, objtype=None):
# PURPOSE: get the hash value of a file
def get_hash(
local: str | io.IOBase | pathlib.Path,
algorithm: str = 'MD5'
algorithm: str = 'md5'
):
"""
Get the hash value from a local file or ``BytesIO`` object
Expand All @@ -175,18 +178,16 @@ def get_hash(
----------
local: obj, str or pathlib.Path
BytesIO object or path to file
algorithm: str, default 'MD5'
algorithm: str, default 'md5'
hashing algorithm for checksum validation

- ``'MD5'``: Message Digest
- ``'sha1'``: Secure Hash Algorithm
"""
# check if open file object or if local file exists
if isinstance(local, io.IOBase):
if (algorithm == 'MD5'):
return hashlib.md5(local.getvalue()).hexdigest()
elif (algorithm == 'sha1'):
return hashlib.sha1(local.getvalue()).hexdigest()
# generate checksum hash for a given type
if algorithm in hashlib.algorithms_available:
return hashlib.new(algorithm, local.getvalue()).hexdigest()
else:
raise ValueError(f'Invalid hashing algorithm: {algorithm}')
elif isinstance(local, (str, pathlib.Path)):
# generate checksum hash for local file
local = pathlib.Path(local).expanduser()
Expand All @@ -196,10 +197,10 @@ def get_hash(
# open the local_file in binary read mode
with local.open(mode='rb') as local_buffer:
# generate checksum hash for a given type
if (algorithm == 'MD5'):
return hashlib.md5(local_buffer.read()).hexdigest()
elif (algorithm == 'sha1'):
return hashlib.sha1(local_buffer.read()).hexdigest()
if algorithm in hashlib.algorithms_available:
return hashlib.new(algorithm, local_buffer.read()).hexdigest()
else:
raise ValueError(f'Invalid hashing algorithm: {algorithm}')
else:
return ''

Expand Down Expand Up @@ -813,6 +814,44 @@ def from_http(
remote_buffer.seek(0)
return remote_buffer

# PURPOSE: load a JSON response from a http host
def from_json(
HOST: str | list,
timeout: int | None = None,
context: ssl.SSLContext = _default_ssl_context
) -> dict:
"""
Load a JSON response from a http host

Parameters
----------
HOST: str or list
remote http host path split as list
timeout: int or NoneType, default None
timeout in seconds for blocking operations
context: obj, default pyTMD.utilities._default_ssl_context
SSL context for ``urllib`` opener object
"""
# verify inputs for remote http host
if isinstance(HOST, str):
HOST = url_split(HOST)
# try loading JSON from http
try:
# Create and submit request for JSON response
request = urllib2.Request(posixpath.join(*HOST))
request.add_header('Accept', 'application/json')
response = urllib2.urlopen(request, timeout=timeout, context=context)
except urllib2.HTTPError as exc:
logging.debug(exc.code)
raise RuntimeError(exc.reason) from exc
except urllib2.URLError as exc:
logging.debug(exc.reason)
msg = 'Load error from {0}'.format(posixpath.join(*HOST))
raise Exception(msg) from exc
else:
# load JSON response
return json.loads(response.read())

# PURPOSE: attempt to build an opener with netrc
def attempt_login(
urs: str,
Expand Down Expand Up @@ -1196,7 +1235,7 @@ def s3_region():
boto3 = import_dependency('boto3')
region_name = boto3.session.Session().region_name
return region_name

# PURPOSE: get AWS s3 client for PO.DAAC Cumulus
def s3_client(
HOST: str = _s3_endpoints['podaac'],
Expand Down Expand Up @@ -1819,7 +1858,6 @@ def cmr(
CMR_KEYS.append(f'?provider={provider}')
CMR_KEYS.append('&sort_key[]=start_date')
CMR_KEYS.append('&sort_key[]=producer_granule_id')
CMR_KEYS.append('&scroll=true')
CMR_KEYS.append(f'&page_size={cmr_page_size}')
# dictionary of product shortnames
short_names = cmr_product_shortname(mission, center, release,
Expand All @@ -1844,20 +1882,21 @@ def cmr(
granule_names = []
granule_urls = []
granule_mtimes = []
cmr_scroll_id = None
cmr_search_after = None
while True:
req = urllib2.Request(cmr_query_url)
if cmr_scroll_id:
req.add_header('cmr-scroll-id', cmr_scroll_id)
# add CMR search after header
if cmr_search_after:
req.add_header('CMR-Search-After', cmr_search_after)
logging.debug(f'CMR-Search-After: {cmr_search_after}')
response = opener.open(req)
# get scroll id for next iteration
if not cmr_scroll_id:
headers = {k.lower():v for k,v in dict(response.info()).items()}
cmr_scroll_id = headers['cmr-scroll-id']
# get search after index for next iteration
headers = {k.lower():v for k,v in dict(response.info()).items()}
cmr_search_after = headers.get('cmr-search-after')
# read the CMR search as JSON
search_page = json.loads(response.read().decode('utf8'))
ids,urls,mtimes = cmr_filter_json(search_page, endpoint=endpoint)
if not urls:
if not urls or cmr_search_after is None:
break
# extend lists
granule_names.extend(ids)
Expand Down
2 changes: 1 addition & 1 deletion scripts/plot_global_grid_3maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def plot_grid(base_dir, FILENAMES,
cbar.solids.set_rasterized(True)
# Add label to the colorbar
cbar.ax.set_ylabel(CBTITLE, labelpad=10, fontsize=18)
cbar.ax.set_title(CBUNITS, fontsize=18, va='bottom')
cbar.ax.set_title(CBUNITS, fontsize=18, va='bottom', y=-1.4)
# Set the tick levels for the colorbar
cbar.set_ticks(levels)
cbar.set_ticklabels([CBFORMAT.format(ct) for ct in levels])
Expand Down
Loading
Loading