Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add check for file existence #143

Merged
merged 10 commits into from
Apr 5, 2023
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Change CDS keys from `.cdsapirc` file to `.config/eracli.txt` file. This will avoid conflict with e.g. ADS.
- If a user makes a request without `--splitmonths` they are warned that the behavior will change in the future, and that they have to choose between `--splitmonths False` and `--splitmonths True`.
- When a request would encounter a Request Too Large error in the CDS API, they are warned, and given a suggestion to use `--splitmonths`.
- When a file already exists and would be overwritten, the user is prompted for confirmation. This should prevent accidental overwriting of files.
- `cli.py` has been refactored to make the structure more clear. Seperate argument builders are now in their own modules.
- The documentation has been overhauled, and now uses Markdown files & MkDocs.

Expand Down
18 changes: 14 additions & 4 deletions era5cli/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ def __init__(
)

def _get_login(self):
# First check if the config exists, and guide the user if it does not.
key_management.check_era5cli_config()
# Only then load the keys (as they should be there now).
self.url, self.key = key_management.load_era5cli_config()

def fetch(self, dryrun=False):
Expand Down Expand Up @@ -263,11 +266,11 @@ def _split_variable(self):
outputfiles = [
self._define_outputfilename(var, self.years) for var in self.variables
]
era5cli.utils.assert_outputfiles_not_exist(outputfiles)

years = len(outputfiles) * [self.years]
if not self.threads:
pool = Pool()
else:
pool = Pool(nodes=self.threads)

pool = Pool(nodes=self.threads) if self.threads else Pool()
pool.map(self._getdata, self.variables, years, outputfiles)

def _split_variable_yr(self):
Expand All @@ -277,7 +280,11 @@ def _split_variable_yr(self):
for var in self.variables:
outputfiles += [self._define_outputfilename(var, [yr]) for yr in self.years]
variables += len(self.years) * [var]

era5cli.utils.assert_outputfiles_not_exist(outputfiles)

years = len(self.variables) * self.years

pool = Pool(nodes=self.threads) if self.threads else Pool()
pool.map(self._getdata, variables, years, outputfiles)

Expand All @@ -296,6 +303,8 @@ def _split_variable_yr_month(self):
years += [year]
months += [month]

era5cli.utils.assert_outputfiles_not_exist(outputfiles)

pool = Pool(nodes=self.threads) if self.threads else Pool()
pool.map(self._getdata, variables, years, outputfiles, months)

Expand Down Expand Up @@ -493,6 +502,7 @@ def _exit(self):
def _getdata(self, variables: list, years: list, outputfile: str, months=None):
"""Fetch variables using cds api call."""
name, request = self._build_request(variables, years, months)

if self.dryrun:
print(name, request, outputfile)
else:
Expand Down
15 changes: 15 additions & 0 deletions era5cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import shutil
import textwrap
from pathlib import Path
from typing import List
import prettytable
from netCDF4 import Dataset
import era5cli
Expand Down Expand Up @@ -205,3 +206,17 @@ def strtobool(value: str) -> bool:
"Could not convert string to boolean. Valid inputs are:"
f"{trues} and {falses} (case insensitive)."
)


def assert_outputfiles_not_exist(outputfiles: List[str]) -> None:
"""Check if files already exist, and prompt the user if they do."""
if any(Path(file).exists() for file in outputfiles):
answer = input(
"\n Some filenames already exists in this folder."
"\n Do you want to overwrite them? (Y/N)"
)
if answer.lower() in ["n", "no", "nope"]:
raise FileExistsError(
"\n One or more files already exist in this folder."
"\n Please remove them, or change to a different folder to continue"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This begs for a skip-existing flag...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have implemented --overwrite.

)
21 changes: 21 additions & 0 deletions tests/test_fetch.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Tests for era5cli Fetch class."""

import pathlib
import unittest.mock as mock
import pytest
from era5cli import _request_size
Expand All @@ -23,6 +24,14 @@
# fmt: on


@pytest.fixture(scope="module", autouse=True)
def my_thing_mock():
with mock.patch(
"era5cli.fetch.key_management.check_era5cli_config", autospec=True
) as _fixture:
yield _fixture


def initialize(
outputformat="netcdf",
merge=False,
Expand Down Expand Up @@ -643,3 +652,15 @@ def test_area():
with pytest.raises(ValueError):
era5 = initialize(area=[-180, 180, -90])
era5._build_request("total_precipitation", [2008])


def test_file_exists():
with mock.patch.object(pathlib.Path, "exists", return_value=True):
era5 = initialize()

with mock.patch("builtins.input", return_value="Y"):
era5.fetch(dryrun=True)

with mock.patch("builtins.input", return_value="N"):
with pytest.raises(FileExistsError):
era5.fetch(dryrun=True)
8 changes: 8 additions & 0 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
from era5cli.cli import main


@pytest.fixture(scope="module", autouse=True)
def my_thing_mock():
with mock.patch(
"era5cli.fetch.key_management.check_era5cli_config", autospec=True
) as _fixture:
yield _fixture


# combine calls with result and possible warning message
call_result = [
{
Expand Down