Skip to content

Commit

Permalink
add new structure
Browse files Browse the repository at this point in the history
  • Loading branch information
mscheltienne committed Apr 26, 2024
1 parent 276579a commit e366f53
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 46 deletions.
37 changes: 20 additions & 17 deletions itvalidator/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,27 @@
ERRORS_CODES: dict[int, str] = {
# file violations
1: "File code does not match folder code.",
20: "File name stem contains invalid characters (space, '-', '.', ...).",
30: "File date format is invalid, expected 'YYMMDD'.",
31: "File date is in the future.",
40: f"File user code length is not {_USERCODE_LENGTH} characters.",
41: "File user code must be uppercase.",
2: "File code must end with a lowercase letter, except for root folders.",
3: "File code must start with a leading underscore.",
4: "File code has multiple leading underscores.",
21: "File name stem contains invalid characters (space, '-', '.', ...).",
31: "File date format is invalid, expected 'YYMMDD'.",
32: "File date is in the future.",
41: f"File user code length is not {_USERCODE_LENGTH} characters.",
42: "File user code must be uppercase.",
# folder violations
100: "Folder code does not match parent folder code.",
101: "Folder code must end with a lowercase letter, except root folders.",
102: "Folder code must start with a leading underscore.",
103: "Folder code has multiple leading underscores.",
104: "Folder code must start with 'F'.",
110: "Folder name stem contains invalid characters (space, '-', '.', ...).",
101: "Folder code does not match parent folder code.",
102: "Folder code must end with a lowercase letter, except for root folders.",
103: "Folder code must start with a leading underscore.",
104: "Folder code has multiple leading underscores.",
105: "Folder code must start with 'F'.",
111: "Folder name stem contains invalid characters (space, '-', '.', ...).",
# subfolder violations
200: "Subfolders code last letters are not consecutive lowercase letters.",
201: "Subfolders code last letters are not consecutive lowercase letters.",
# parser failures
300: "File name could not be parsed.",
301: "File name could not be validated because parent folder is invalid.",
310: "Folder name could not be parsed.",
311: "Folder name could not be validated because parent folder is invalid.",
320: "Subfolders code last letters could not be validated because at least one subfolder is invalid.", # noqa: E501
301: "File name could not be parsed.",
302: "File name could not be validated because parent folder is invalid.",
311: "Folder name could not be parsed.",
312: "Folder name could not be validated because parent folder is invalid.",
321: "Subfolders code last letters could not be validated because at least one subfolder is invalid.", # noqa: E501
}
18 changes: 9 additions & 9 deletions itvalidator/tests/test_validator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@ def test_validate_fname(tmp_path):
fname = tmp_path / "_F1_folder" / "test.txt"
fname.write_text("101")
err = _validate_fname(fname)
assert err == [300]
assert err == [301]

(tmp_path / "_F1folder").mkdir()
fname = tmp_path / "_F1folder" / "F1_220101_file_ABC.txt"
fname.write_text("101")
err = _validate_fname(fname)
assert err == [301]
assert err == [302]

# invalid file names
fname = tmp_path / "_F1_folder" / "F1a_2201_file-test_ABCD.txt"
fname.write_text("101")
err = _validate_fname(fname)
assert sorted(err) == [1, 20, 30, 40]
assert sorted(err) == [1, 21, 31, 41]

fname = tmp_path / "_F1_folder" / "F1_400101_file_test_ABc.txt"
fname.write_text("101")
err = _validate_fname(fname)
expected = (
[31, 41] if datetime.now() < datetime(year=2040, month=1, day=1) else [41]
[32, 42] if datetime.now() < datetime(year=2040, month=1, day=1) else [42]
)
assert sorted(err) == expected

Expand All @@ -49,25 +49,25 @@ def test_validate_folder_name(tmp_path):
folder = tmp_path / "_F1_folder" / "_F1asubfolder"
folder.mkdir(parents=True)
err = _validate_folder_name(folder)
assert err == [310]
assert err == [311]

folder = tmp_path / "_F1_folder" / "test"
folder.mkdir(parents=True)
err = _validate_folder_name(folder)
assert err == [310]
assert err == [311]

folder = tmp_path / "_F1folder" / "_F1a_subfolder"
folder.mkdir(parents=True)
err = _validate_folder_name(folder)
assert err == [311]
assert err == [312]

# invalid folder names
folder = tmp_path / "_F1_folder" / "G1A_subfolder-test"
folder.mkdir(parents=True)
err = _validate_folder_name(folder)
assert sorted(err) == [100, 101, 102, 104, 110]
assert sorted(err) == [101, 102, 103, 105, 111]

folder = tmp_path / "_F1_folder" / "__F1a_subfolder"
folder.mkdir(parents=True)
err = _validate_folder_name(folder)
assert err == [103]
assert err == [104]
13 changes: 12 additions & 1 deletion itvalidator/utils/_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Any
from collections.abc import Callable
from typing import Any

# -- Documentation dictionary ----------------------------------------------------------
docdict: dict[str, str] = dict()
Expand All @@ -32,9 +32,20 @@
# -- N ---------------------------------------------------------------------------------
# -- O ---------------------------------------------------------------------------------
# -- P ---------------------------------------------------------------------------------
docdict["primary_violations"] = """
primary_violations : dict
Dictionary of already found primary violation. Primary violation must be
resolved."""

# -- Q ---------------------------------------------------------------------------------
# -- R ---------------------------------------------------------------------------------
# -- S ---------------------------------------------------------------------------------
docdict["secondary_violations"] = """
secondary_violations : dict
Dictionary of already found secondary violation. Secondary violation likely occur
due to a primary violation. The dictionary is modified in-place by each recursive
call."""

# -- T ---------------------------------------------------------------------------------
# -- U ---------------------------------------------------------------------------------
# -- V ---------------------------------------------------------------------------------
Expand Down
11 changes: 7 additions & 4 deletions itvalidator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def _validate_folder_content(
violations : dict
Dictionary of violations found in the folder and its content.
"""
assert folder.is_dir() # sanity-check
folders = [] # list folders to validate last code letter consecutiveness
for elt in folder.iterdir():
if elt.is_dir() and elt.name != "__old":
Expand Down Expand Up @@ -139,6 +140,7 @@ def _validate_folder_name(folder: Path, validate_parent_code: bool = True) -> in
validate_parent_code : bool
If False, ignore the code validation based on the parent folder.
"""
assert folder.is_dir() # sanity-check
if not folder.name.startswith("_"):
return 102
try:
Expand Down Expand Up @@ -176,14 +178,15 @@ def _validate_fname(fname: Path) -> int:
The error code corresponding to the validation error. 0 is returned if no error
is found.
"""
try:
folder_code, _ = _parse_folder_name(fname.parent.name)
except Exception:
return 301
assert fname.is_file() # sanity-check
try:
fname_code, date, name, usercode = _parse_file_stem(fname.stem)
except Exception:
return 300
try:
folder_code, _ = _parse_folder_name(fname.parent.name)
except Exception:
return 301
if folder_code != fname_code:
return 1
if any(elt in name for elt in _FORBIDDEN_STEM_CHARACTERS):
Expand Down
133 changes: 118 additions & 15 deletions itvalidator/validator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,114 @@

from ._parser import parse_file_stem, parse_folder_name
from .config import _FORBIDDEN_STEM_CHARACTERS, _USERCODE_LENGTH
from .utils._docs import fill_doc

if TYPE_CHECKING:
from pathlib import Path


@fill_doc
def _validate_root_folder(
folder: Path,
primary_violations: dict[Path, list[int]],
secondary_violations: dict[Path, list[int]],
) -> dict[Path, int]:
"""Validate the content of a folder.
Parameters
----------
folder : Path
Full path to the folder to validate.
%(primary_violations)s
%(secondary_violations)s
Returns
-------
%(primary_violations)s
%(secondary_violations)s
"""
assert folder.is_dir() # sanity-check
# validate the current folder name
errors_folder = _validate_folder_name(folder)
errors_folder = [err for err in errors_folder if err != 312]
if len(errors_folder) != 0:
primary_violations[folder] = errors_folder
# validate the content of the current folder
_validate_folder_content(
folder, errors_folder, primary_violations, secondary_violations
)


def _validate_non_root_folder(
folder: Path,
errors_parent_folder: list[int],
primary_violations: dict[Path, list[int]],
secondary_violations: dict[Path, list[int]],
):
"""Validate the content of subfolders recursively."""
assert folder.is_dir() # sanity-check
# validate the current folder name
errors_folder = _validate_folder_name(folder)
_triage_primary_and_secondary_violations(
folder,
errors_folder,
errors_parent_folder,
primary_violations,
secondary_violations,
)
# validate the content of the folder
_validate_folder_content(
folder, errors_folder, primary_violations, secondary_violations
)


def _validate_folder_content(
folder: Path,
errors_folder: list[int],
primary_violations: dict[Path, list[int]],
secondary_violations: dict[Path, list[int]],
):
for elt in folder.iterdir():
if elt.is_dir() and elt.name == "__old":
pass
elif elt.is_dir():
_validate_non_root_folder(
elt, errors_folder, primary_violations, secondary_violations
)
else: # file
errors = _validate_fname(elt)
_triage_primary_and_secondary_violations(
elt, errors, errors_folder, primary_violations, secondary_violations
)


def _triage_primary_and_secondary_violations(
path: Path,
errors: list[int],
errors_parent_folder: list[int],
primary_violations: dict[Path, list[int]],
secondary_violations: dict[Path, list[int]],
) -> None:
"""Triage primary and secondary violations based on parent folder errors."""
if len(errors) == 0:
return
if len(errors_parent_folder) == 0:
primary_violations[path] = errors
return
# define what is a primary or secondary error
if any(elt in errors_parent_folder for elt in (101, 102, 103, 104, 105)):
secondary = {1, 101, 302, 312}
else:
secondary = {302, 312}
# store in the correct variables
primary_errors = set(errors) - secondary
secondary_errors = set(errors) & secondary
if len(primary_errors) != 0:
primary_violations[path] = list(primary_errors)
if len(secondary_errors) != 0:
secondary_violations[path] = list(secondary_errors)


def _validate_folder_name(folder: Path) -> list[int]:
"""Validate a folder name.
Expand All @@ -29,30 +132,30 @@ def _validate_folder_name(folder: Path) -> list[int]:
try:
code, name = parse_folder_name(folder.name)
except Exception:
return [310]
return [311]
error_codes = []
# validate leading underscore and first code letter.
if not folder.name.startswith("_"):
error_codes.append(102)
if folder.name[1] == "_":
error_codes.append(103)
if code[0] != "F":
if folder.name[1] == "_":
error_codes.append(104)
if code[0] != "F":
error_codes.append(105)
# compare code with parent folder code
try:
folder_parent_code, _ = parse_folder_name(folder.parent.name)
if folder_parent_code != code[:-1]:
error_codes.append(100)
error_codes.append(101)
except Exception:
error_codes.append(311)
error_codes.append(312)
# validate that the last code element is a lowercase letter, except for root folders
if 3 <= len(code):
code_letter = code[-1]
if code_letter not in string.ascii_lowercase:
error_codes.append(101)
error_codes.append(102)
# validate name content
if any(elt in name for elt in _FORBIDDEN_STEM_CHARACTERS):
error_codes.append(110)
error_codes.append(111)
return error_codes


Expand All @@ -74,28 +177,28 @@ def _validate_fname(fname: Path) -> list[int]:
try:
fname_code, date, name, usercode = parse_file_stem(fname.stem)
except Exception:
return [300]
return [301]
error_codes = []
# compare fname_code with parent folder code
try:
folder_code, _ = parse_folder_name(fname.parent.name)
if folder_code != fname_code:
error_codes.append(1)
except Exception:
error_codes.append(301)
error_codes.append(302)
# validate date format and value
try:
date = datetime.strptime(date, "%y%m%d")
if datetime.now() < date:
error_codes.append(31)
error_codes.append(32)
except ValueError:
error_codes.append(30)
error_codes.append(31)
# validate name content
if any(elt in name for elt in _FORBIDDEN_STEM_CHARACTERS):
error_codes.append(20)
error_codes.append(21)
# validate usercode
if len(usercode) != _USERCODE_LENGTH:
error_codes.append(40)
if usercode.upper() != usercode:
error_codes.append(41)
if usercode.upper() != usercode:
error_codes.append(42)
return error_codes

0 comments on commit e366f53

Please sign in to comment.