Skip to content

Commit

Permalink
Add automatic copyright notice verification (#963)
Browse files Browse the repository at this point in the history
* Add license checker

* Add workflow for checking licenses

* Install license checker dependencies

* Manually specify the branch

* Use hard-coded branch name

* Try by directly specifying files

* Use fork

* Remove file filter

* Clean up

* Test workflow failure

* Fix workflow file

* Fix workflow

* Revert touched file

* Fix filter

* Try another filter

* revert

* Update filter

* fix filter

* .

* Try some more stuff

* .

* .

* Allow empty list of files

* Some aesthetic improvement

* Blacken Python code

* Provoke failure

* Fix failure

* Fix `File.write` and its test

* Improve error handling

* Move Python stuff to general `.gitignore`

* Blacken code

* Add script for checking all rust files

* Fix file extension regex

* Rename script accordingly
  • Loading branch information
maltekliemann authored Jun 21, 2023
1 parent 663388a commit 5e953c4
Show file tree
Hide file tree
Showing 15 changed files with 728 additions and 1 deletion.
20 changes: 20 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,26 @@ jobs:
- name: Format
run: ./scripts/tests/format.sh --check

copyright:
name: Copyright Notices
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
- name: Install check-license and dependencies
run: |
pip install scripts/check-license
pip install -r scripts/check-license/requirements.txt
- name: Query files changed
id: files_changed
uses: Ana06/[email protected]
with:
filter: '*.rs$'
- name: Check copyright notices
run: check-license ${{ steps.files_changed.outputs.added_modified }}

checks:
name: Checks
runs-on: ubuntu-latest
Expand Down
136 changes: 135 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,138 @@
**/node_modules/

# Visual Studio Code
.vscode
.vscode

##############################
# GitHub's Python .gitignore #
##############################

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
27 changes: 27 additions & 0 deletions scripts/check-license/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
VENV?=.venv
BIN?=$(VENV)/bin
PYTHON?=$(BIN)/python
PIP?=$(BIN)/pip
PYTEST?=$(BIN)/pytest

.PHONY: default
default: install
$(PYTEST) tests/

.PHONY: venv
venv:
pip install virtualenv
[ -d $(VENV) ] || virtualenv $(VENV)
$(PIP) install -r requirements.txt
make install

.PHONY: clean
clean:
python setup.py clean
rm -fr .venv
rm -fr build
rm -fr dist

.PHONY: install
install:
$(PYTHON) setup.py install
3 changes: 3 additions & 0 deletions scripts/check-license/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
click==8.0.3
pytest==5.4.3
pytest-mock==3.7.0
6 changes: 6 additions & 0 deletions scripts/check-license/resources/test_read
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Copyright 2020-2021, 2023 Holder.
// Copyright 1999 This other guy.
//
// This is the license.

This is the rest of the file!
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Copyright 2020-2021, 2023 Holder.
// (c) Copyright 1999 This other guy.
//
// This is the license.

This is the rest of the file!
8 changes: 8 additions & 0 deletions scripts/check-license/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from setuptools import setup

setup(
name="check-license",
packages=["check_license"],
package_dir={"": "src"},
entry_points={"console_scripts": ["check-license = check_license:main"]},
)
25 changes: 25 additions & 0 deletions scripts/check-license/src/check_license/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import argparse
import datetime
import logging
import sys

from check_license.check_license import check_files, update_files
from check_license.console import echo


def main():
# TODO Add option to ignore files?
parser = argparse.ArgumentParser()
parser.add_argument("files", nargs="*")
parser.add_argument("-w", "--write", action="store_true")
args = parser.parse_args(sys.argv[1:])
current_year = datetime.date.today().year
if args.write:
failed, count = update_files(current_year, args.files)
echo(f"Updated {count} files. ✍️")
else:
failed = check_files(current_year, args.files)
if failed:
sys.exit(1)
echo("All copyright notices are up to date! 🍉")
sys.exit(0)
131 changes: 131 additions & 0 deletions scripts/check-license/src/check_license/check_license.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import annotations

import dataclasses
import datetime
import re
import os

from check_license.console import echo
from check_license.copyright import Copyright, CopyrightError
from check_license.errors import (
LicenseCheckerError,
MissingCopyrightError,
IllegalCopyrightError,
DuplicateCopyrightError,
OutdatedCopyrightError,
)

# TODO Get owner according to exact date
FORECASTING_TECH = "Forecasting Technologies LTD"
OWNER = FORECASTING_TECH


class File:
def __init__(
self, path: str, copyright_notices: Optional[list] = None, blob: str = ""
) -> None:
self._path = path
self._copyright_notices = copyright_notices or []
self._blob = blob

@property
def path(self) -> str:
return self._path

def last_changed(self) -> datetime.datetime:
"""Return the UTC date at which the file was last changed."""
# FIXME This doesn't take git into account.
return datetime.datetime.utcfromtimestamp(os.path.getmtime(self._path))

def read(self) -> None:
"""Read contents of file to buffer.
May fail due to broken copyright notices. Should be run before calling any other function.
"""
raw_copyright = []
blob = ""
with open(self._path, "r") as f:
# We're assuming that all copyright notices come in one bunch, so once
# we meet a line of whitespace, we give up.
while (line := f.readline()) and line.startswith("//"):
if re.match(r"^// *$", line):
blob += line
break
raw_copyright.append(line[3:]) # Strip "// ".
blob += f.read()
for i, s in enumerate(raw_copyright):
try:
copyright = Copyright.from_string(s)
except CopyrightError:
raise IllegalCopyrightError(self._path, i, s)
self._copyright_notices.append(copyright)
self._blob = blob

def check(self, year) -> None:
"""Check that this file's copyright notice reflects changed made in the current
``year``."""
if not self._copyright_notices:
raise MissingCopyrightError(self._path)
owner_count = len({c.owner for c in self._copyright_notices})
if owner_count != len(self._copyright_notices):
raise DuplicateCopyrightError(self._path)
# TODO Check that the license blob is as expected

copyright = self._get_owner_copyright()
if copyright is None:
raise MissingCopyrightError(self._path, OWNER)
if copyright.end < year:
raise OutdatedCopyrightError(self._path, copyright, year)

def update_license(self, year) -> bool:
"""Update the copyright notice and return `True` if anything changed."""
owner_copyright = self._get_owner_copyright()
if owner_copyright is None:
self._copyright_notices.insert(0, Copyright.from_year(OWNER, year))
return True
if owner_copyright.end != year:
owner_copyright.push_year(year)
return True
return False

def write(self) -> None:
content = "\n".join(["// " + str(c) for c in self._copyright_notices])
if content:
content += "\n"
content += self._blob
with open(self._path, "w") as f:
f.write(content)

def _get_owner_copyright(self) -> Optional[Copyright]:
matches = (c for c in self._copyright_notices if c.owner == OWNER)
# `len(matches) < 2` at this point.
return next(matches, None)


def check_files(year: int, files: list[str]) -> bool:
files = [File(f) for f in files]
result = False
for f in files:
try:
f.read()
f.check(year)
except LicenseCheckerError as e:
echo(str(e))
result = True
return result


def update_files(year: int, files: list[str]) -> tuple[bool, int]:
files = [File(f) for f in files]
result = False
count = 0
for f in files:
try:
f.read()
changed = f.update_license(year)
f.write()
count += changed
except LicenseCheckerError as e:
echo(str(e))
result = True
return result, count
7 changes: 7 additions & 0 deletions scripts/check-license/src/check_license/console.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from __future__ import annotations

import click


def echo(msg: str) -> None:
click.echo(msg)
Loading

0 comments on commit 5e953c4

Please sign in to comment.