Add linter to check for copyright year

This replaces the previous arclint linter for checking the copyright
year in license headers with a gitea workflow job.

As the date of last edit might differ from commit date due to reverts
the copyright linter is run against a base commit.

The python script doing the heavy lifting is written by @Dunedan.

Signed-off-by: Ralph Sennhauser <ralph.sennhauser@gmail.com>
Co-Authored-by: Dunedan <dunedan@phoenitydawn.de>
This commit is contained in:
Ralph Sennhauser 2025-04-01 20:58:52 +02:00
parent 263b481442
commit 19d568d506
No known key found for this signature in database
8 changed files with 277 additions and 84 deletions

View file

@ -3,11 +3,13 @@
name: lint
on:
- pull_request
env:
BASE_SHA: ${{ github.event.pull_request.base.sha }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
jobs:
cppcheck:
runs-on: ubuntu-latest
env:
BASE_SHA: ${{ github.event.pull_request.base.sha }}
steps:
- uses: actions/checkout@v4
@ -42,3 +44,18 @@ jobs:
- name: Check for issues
run: ./source/tools/lint/cppcheck/cppcheck.sh --diff ${{ env.BASE_SHA }}
copyright:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Fetch the base branch, so we can use `git diff`
run: git fetch origin ${{ env.BASE_SHA }}
- name: Check for issues with copyright
run: ./source/tools/lint/copyright/copyright.sh --from ${{ env.BASE_SHA }} --to ${{ env.HEAD_SHA }}

View file

@ -8,7 +8,6 @@ https://secure.phabricator.com/book/phabricator/article/arcanist_lint/
- `text` is configured to detect whitespace issues.
- `json` detects JSON syntax errors.
- `licence-year` detects Copyright header years and compares against modification time.
- `eslint`, if installed, will run on javascript files.
## Installation

View file

@ -11,7 +11,6 @@ phutil_register_library_map(array(
'class' => array(
'ESLintLinter' => 'src/ESLintLinter.php',
'JenkinsRenderer' => 'src/JenkinsRenderer.php',
'LicenceYearLinter' => 'src/LicenceYearLinter.php',
),
'function' => array(
'remove_null' => 'src/JenkinsRenderer.php',
@ -19,6 +18,5 @@ phutil_register_library_map(array(
'xmap' => array(
'ESLintLinter' => 'ArcanistExternalLinter',
'JenkinsRenderer' => 'ArcanistLintRenderer',
'LicenceYearLinter' => 'ArcanistLinter',
),
));

View file

@ -1,78 +0,0 @@
<?php
/**
* Copyright 2021 Wildfire Games.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Linter for copyright years - if the modification time is incorrect, suggests update.
*/
final class LicenceYearLinter extends ArcanistLinter {
public function getInfoName() {
return pht('Licence Year Linter');
}
public function getLinterName() {
return 'LICENCE YEAR';
}
public function getLinterConfigurationName() {
return 'licence-year';
}
const BAD_YEAR = 1;
public function getLintSeverityMap() {
return array(
// Error makes it appear even if on an unmodified line, too.
self::BAD_YEAR => ArcanistLintSeverity::SEVERITY_ERROR,
);
}
public function getLintNameMap() {
return array(
self::BAD_YEAR => pht('Inaccurate Copyright Year'),
);
}
public function lintPath($path) {
$txt = $this->getData($path);
$matches = null;
$preg = preg_match_all(
"/Copyright( \(C\))? (20[0-9]{2}) Wildfire Games/",
$txt,
$matches,
PREG_OFFSET_CAPTURE);
if (!$preg) {
return;
}
$year = date("Y", filemtime($path));
foreach ($matches[2] as $match) {
list($string, $offset) = $match;
if ($string == $year) {
continue;
}
$this->raiseLintAtOffset(
$offset,
self::BAD_YEAR,
pht('Inaccurate Copyright Year'),
$string,
"$year");
}
}
}

View file

@ -16,3 +16,7 @@ The format for an error suppression is one of:
### libraries
Adding library cfg's for other deps could improve cppchecks ability to find issues.
## copyright
A linter for checking copyright dates in file headers are up to date.

View file

@ -0,0 +1,185 @@
#!/usr/bin/env python3
#
# Copyright (C) 2025 Wildfire Games.
# This file is part of 0 A.D.
#
# 0 A.D. is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# 0 A.D. is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
"""pre-commit hook to check for correct copyright year.
This script checks whether files, which contain a copyright notice,
contain the correct copyright year. When run as a pre-commit hook,
it checks the staged files only. When run on all files
(pre-commit run --all-files), to not generate warnings for all files,
it checks that the copyright year matches the year the file was last
modified.
This script will only work for text files encoded in UTF-8. All other
files passed to it will be silently ignored.
"""
import difflib
import re
import subprocess
from argparse import ArgumentDefaultsHelpFormatter, ArgumentError, ArgumentParser
from collections.abc import Sequence
from datetime import UTC, datetime
def check_copyright_year(
filenames: list[str],
copyright_regex: re.Pattern,
lines_to_check: int = 100,
show_diff: bool = False,
fix: bool = False,
) -> int:
"""Check files for correct copyright year."""
diff_process = subprocess.run(
["git", "diff", "--cached", "--name-only"], capture_output=True, check=True
)
staged_files = diff_process.stdout.decode().split("\n")
current_year = datetime.now(UTC).date().year
is_error = False
last_commit = subprocess.run(
["git", "log", "-1", "--pretty=%cI"], capture_output=True, check=True
)
last_commit_year = datetime.fromisoformat(last_commit.stdout.decode().strip()).year
for filename in filenames:
with open(filename, encoding="utf8") as f:
if lines_to_check > 0:
data_list = []
try:
for _ in range(lines_to_check):
data_list.append(next(f))
except StopIteration:
pass
except UnicodeDecodeError:
continue
data = "".join(data_list)
else:
try:
data = f.read()
except UnicodeDecodeError:
continue
match = copyright_regex.search(data)
if not match:
continue
copyright_year = int(match.group(1))
if filename in staged_files:
if copyright_year == current_year:
continue
# Avoid reporting outdated copyright years when commits were
# done at the end of a year, but the check runs in the
# following year, by not using the current year, but the year
# of the last commit as expected year in that case.
elif copyright_year == last_commit_year:
continue
expected_year = current_year
# file to check isn't staged, so we're likely running with
# --all-files. Use committer date of the last commit instead
# as indication what the copyright year should be.
if filename not in staged_files:
last_modified = subprocess.run(
["git", "log", "-1", "--pretty=%cI", filename],
capture_output=True,
check=True,
)
last_modified_year = datetime.fromisoformat(last_modified.stdout.decode().strip()).year
if last_modified_year == copyright_year:
continue
# Reporting the last modified year as expected year is
# probably misleading, if it isn't the current year as
# well, as changing the copyright year causes the last
# modification year to be the current year. However,
# reporting the current year as expected year would be
# equally confusing, as the file might not have been
# modified in the current year yet, so the expected year
# wouldn't match the year the file got last modified.
expected_year = last_modified_year
print(f"{filename}: Copyright year {copyright_year} instead of {expected_year}")
is_error = True
data_modified = copyright_regex.sub(
lambda x: x.group(0).replace(x.group(1), str(current_year)), data
)
if show_diff:
show_diff = difflib.unified_diff(data.split("\n"), data_modified.split("\n"), n=2)
print("\n".join(list(show_diff)[2:]), end="\n\n")
if fix:
with open(filename, "r+", encoding="utf8") as f:
f.write(data_modified)
return 1 if is_error else 0
def regex_type(value: str) -> re.Pattern:
"""Regex pattern argument type for argparse."""
try:
return re.compile(value)
except re.error as exc:
raise ArgumentError from exc
def main(argv: Sequence[str] | None = None) -> int:
"""Parse command line parameters and call checking logic."""
parser = ArgumentParser(
description="Check files with license header for correct copyright year.",
formatter_class=lambda prog: ArgumentDefaultsHelpFormatter(prog, width=78),
)
parser.add_argument(
"filenames",
nargs="+",
help="Files to check for the copyright year.",
)
parser.add_argument(
"--regex",
type=regex_type,
default="(?im)^(?://|/\\*|#) Copyright \\(C?\\) (\\d+) Wildfire Games",
help="The regex to search for copyright notices and to use to fix copyright years. Must "
"contain a single capture group with the copyright year.",
)
parser.add_argument(
"--diff",
action="store_true",
help="Show differences of actual and desired copyright years",
)
parser.add_argument(
"--fix",
action="store_true",
help="Automatically fix outdated coypyright years.",
)
parser.add_argument(
"--lines-to-check",
type=int,
default=100,
help="Number of lines to check to find a copyright notice. Set to 0 or a negative "
"value to read whole files.",
)
args = parser.parse_args(argv)
return check_copyright_year(
args.filenames, args.regex, args.lines_to_check, args.diff, args.fix
)
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,42 @@
#!/bin/sh
set -e
cd "$(dirname "$0")/../../../.."
while [ "$#" -gt 0 ]; do
case "$1" in
--from)
from_commitish=$2
shift
;;
--to)
to_commitish=$2
shift
;;
-j*) ;;
*)
printf "Unknown option: %s\n\n" "$1"
exit 1
;;
esac
shift
done
if [ -n "${from_commitish}" ]; then
if [ -n "${to_commitish}" ]; then
diff="${from_commitish}..${to_commitish}"
else
diff="${from_commitish}..$(git rev-parse HEAD)"
fi
printf "Running copyright linter for range\n%s\n\n" "${diff}"
fi
if [ -n "${diff}" ]; then
git diff --name-status --no-renames "${diff}" |
awk '!/^D/{$1=""; printf "%s\0", substr($0,2)}' |
xargs -0 -L100 ./source/tools/lint/copyright/check_copyright_year.py
else
echo "WARNING: running copyright linter without base commit, likely not what you want."
find . -type f -print0 |
xargs -0 -L100 ./source/tools/lint/copyright/check_copyright_year.py
fi

View file

@ -3,14 +3,40 @@ set -e
cd "$(dirname "$0")"
while [ "$#" -gt 0 ]; do
case "$1" in
--diff)
commitish=$2
args="${args} --diff $2"
shift
;;
-j*)
args="${args} $1"
;;
*)
printf "Unknown option: %s\n\n" "$1"
exit 1
;;
esac
shift
done
has_errors=false
if command -v cppcheck >/dev/null; then
./cppcheck/cppcheck.sh || has_errors=true
# shellcheck disable=SC2086
./cppcheck/cppcheck.sh ${args} || has_errors=true
else
echo "Cppcheck not found in path"
fi
if [ -n "${commitish}" ]; then
# shellcheck disable=SC2086
copyright/copyright.sh --from ${commitish} || has_errors=true
else
"Skipping copyright linter as no base commit was defined"
fi
if [ ${has_errors} = true ]; then
exit 1
fi