Skip to content

Commit

Permalink
Add better handling of symlinks
Browse files Browse the repository at this point in the history
  • Loading branch information
matrss committed Jun 19, 2023
1 parent 91acd13 commit 9ae09f6
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 33 deletions.
78 changes: 48 additions & 30 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2023 DB Systel GmbH
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -135,8 +136,21 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
_LOGGER.debug("ignoring '%s'", the_file)
continue
if the_file.is_symlink():
_LOGGER.debug("skipping symlink '%s'", the_file)
continue
real_file = the_file.resolve()
_LOGGER.debug(
"'%s' is a symlink pointing to '%s'",
the_file,
real_file,
)
if (
real_file.is_relative_to( # type: ignore
self.root.absolute()
)
and real_file.exists()
and not self._is_path_ignored(real_file)
):
_LOGGER.debug("skipping symlink '%s'", the_file)
continue
# Suppressing this error because I simply don't want to deal
# with that here.
with contextlib.suppress(OSError):
Expand Down Expand Up @@ -182,35 +196,39 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo:
dep5_path = source_path

# Search the file for REUSE information.
with path.open("rb") as fp:
try:
# Completely read the file once to search for possible snippets
if _contains_snippet(fp):
_LOGGER.debug(f"'{path}' seems to contain a SPDX Snippet")
read_limit = None
else:
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possible limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result:
source_path = str(path)
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE_FILE
if not path.is_symlink():
with path.open("rb") as fp:
try:
# Completely read the file once to search for possible
# snippets
if _contains_snippet(fp):
_LOGGER.debug(
f"'{path}' seems to contain a SPDX Snippet"
)
read_limit = None
else:
source_type = SourceType.FILE_HEADER

except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possible limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result:
source_path = str(path)
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE_FILE
else:
source_type = SourceType.FILE_HEADER

except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)

# There is both information in a .dep5 file and in the file header
if (
Expand Down
7 changes: 4 additions & 3 deletions src/reuse/report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2022 Pietro Albini <[email protected]>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -445,14 +446,14 @@ def generate(
) -> "FileReport":
"""Generate a FileReport from a path in a Project."""
path = Path(path)
if not path.is_file():
raise OSError(f"{path} is not a file")
if not path.is_file() and not path.is_symlink():
raise OSError(f"{path} is not supported")

relative = project.relative_from_root(path)
report = cls("./" + str(relative), path, do_checksum=do_checksum)

# Checksum and ID
if report.do_checksum:
if report.do_checksum and not path.is_symlink():
report.spdxfile.chk_sum = _checksum(path)
else:
# This path avoids a lot of heavy computation, which is handy for
Expand Down

0 comments on commit 9ae09f6

Please sign in to comment.