Merge pull request #8470 from gilles-peskine-arm/check-files-license

Check copyright statements and SPDX license statements
This commit is contained in:
Dave Rodgman 2023-11-06 17:07:54 +00:00 committed by GitHub
commit b578514b03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -10,10 +10,11 @@ trailing whitespace, and presence of UTF-8 BOM.
Note: requires python 3, must be run from Mbed TLS root.
"""
import os
import argparse
import logging
import codecs
import inspect
import logging
import os
import re
import subprocess
import sys
@ -345,6 +346,100 @@ class MergeArtifactIssueTracker(LineIssueTracker):
return False
def this_location():
frame = inspect.currentframe()
assert frame is not None
info = inspect.getframeinfo(frame)
return os.path.basename(info.filename), info.lineno
THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location()
class LicenseIssueTracker(LineIssueTracker):
"""Check copyright statements and license indications.
This class only checks that statements are correct if present. It does
not enforce the presence of statements in each file.
"""
heading = "License issue:"
LICENSE_EXEMPTION_RE_LIST = [
# Third-party code, other than whitelisted third-party modules,
# may be under a different license.
r'3rdparty/(?!(p256-m)/.*)',
# Documentation explaining the license may have accidental
# false positives.
r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
# Files imported from TF-M, and not used except in test builds,
# may be under a different license.
r'configs/crypto_config_profile_medium\.h\Z',
r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z',
# Third-party file.
r'dco\.txt\Z',
]
path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
LICENSE_EXEMPTION_RE_LIST))
COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
# Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
SPDX_HEADER_KEY = b'SPDX-License-Identifier'
LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
SPDX_RE = re.compile(br'.*?(' +
re.escape(SPDX_HEADER_KEY) +
br')(:\s*(.*?)\W*\Z|.*)', re.I)
LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([
rb'Apache License',
rb'General Public License',
]) + rb')', re.I)
def __init__(self):
super().__init__()
# Record what problem was caused. We can't easily report it due to
# the structure of the script. To be fixed after
# https://github.com/Mbed-TLS/mbedtls/pull/2506
self.problem = None
def issue_with_line(self, line, filepath, line_number):
#pylint: disable=too-many-return-statements
# Use endswith() rather than the more correct os.path.basename()
# because experimentally, it makes a significant difference to
# the running time.
if filepath.endswith(THIS_FILE_BASE_NAME) and \
line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
# Avoid false positives from the code in this class.
# Also skip the rest of this file, which is highly unlikely to
# contain any problematic statements since we put those near the
# top of files.
return False
m = self.COPYRIGHT_RE.match(line)
if m and m.group(1) != self.COPYRIGHT_HOLDER:
self.problem = 'Invalid copyright line'
return True
m = self.SPDX_RE.match(line)
if m:
if m.group(1) != self.SPDX_HEADER_KEY:
self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
return True
if not m.group(3):
self.problem = 'Improperly formatted SPDX license identifier'
return True
if m.group(3) != self.LICENSE_IDENTIFIER:
self.problem = 'Wrong SPDX license identifier'
return True
m = self.LICENSE_MENTION_RE.match(line)
if m:
self.problem = 'Suspicious license mention'
return True
return False
class IntegrityChecker:
"""Sanity-check files under the current directory."""
@ -365,6 +460,7 @@ class IntegrityChecker:
TrailingWhitespaceIssueTracker(),
TabIssueTracker(),
MergeArtifactIssueTracker(),
LicenseIssueTracker(),
]
def setup_logger(self, log_file, level=logging.INFO):