Restructure check-names.py with more verbose error messages

Signed-off-by: Yuto Takano <yuto.takano@arm.com>
This commit is contained in:
Yuto Takano 2021-08-05 19:47:48 +01:00
parent b61f0e1151
commit 3963967ebc

View File

@ -1,14 +1,27 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
#
# Copyright The Mbed TLS Contributors
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This file is part of Mbed TLS (https://tls.mbed.org)
Copyright (c) 2018, Arm Limited, All Rights Reserved
Purpose
This script confirms that the naming of all symbols and identifiers in mbed
TLS are consistent with the house style and are also self-consistent.
This script confirms that the naming of all symbols and identifiers in Mbed TLS
are consistent with the house style and are also self-consistent.
"""
import argparse
import textwrap
import os
import sys
import traceback
@ -17,47 +30,89 @@ import shutil
import subprocess
import logging
# Naming patterns to check against
MACRO_PATTERN = r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$|^YOTTA_[0-9A-Z_]*[0-9A-Z]$"
IDENTIFIER_PATTERN = r"^mbedtls_[0-9a-z_]*[0-9a-z]$"
class Match(object):
def __init__(self, filename, line, pos, name):
self.filename = filename
self.line = line
self.pos = pos
self.name = name
def __str__(self):
return self.name
class Problem(object):
def __init__(self):
self.textwrapper = textwrap.TextWrapper()
self.textwrapper.initial_indent = " * "
self.textwrapper.subsequent_indent = " "
class SymbolNotInHeader(Problem):
def __init__(self, symbol_name):
self.symbol_name = symbol_name
Problem.__init__(self)
def __str__(self):
return self.textwrapper.fill(
"'{0}' was found as an available symbol in the output of nm, "
"however it was not declared in any header files."
.format(self.symbol_name))
class PatternMismatch(Problem):
def __init__(self, pattern, match):
self.pattern = pattern
self.match = match
Problem.__init__(self)
def __str__(self):
return self.textwrapper.fill(
"{0}: '{1}' does not match the required pattern '{2}'."
.format(self.match.filename, self.match.name, self.pattern))
class Typo(Problem):
def __init__(self, match):
self.match = match
Problem.__init__(self)
def __str__(self):
return self.textwrapper.fill(
"{0}: '{1}' looks like a typo. It was not found in any macros or "
"any enums. If this is not a typo, put //no-check-names after it."
.format(self.match.filename, self.match.name))
class NameCheck(object):
def __init__(self):
self.log = None
self.setup_logger()
self.check_repo_path()
self.return_code = 0
self.excluded_files = ["compat-1.3.h"]
self.header_files = self.get_files(os.path.join("include", "mbedtls"))
self.library_files = self.get_files("library")
self.macros = []
self.MBED_names = []
self.enum_consts = []
self.identifiers = []
self.actual_macros = []
self.symbols = []
self.macro_pattern = r"#define (?P<macro>\w+)"
self.MBED_pattern = r"\bMBED.+?_[A-Z0-9_]*"
self.symbol_pattern = r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)"
self.identifier_check_pattern = r"^mbedtls_[0-9a-z_]*[0-9a-z]$"
self.decls_pattern = (
r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)"
)
self.macro_const_check_pattern = (
r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$|^YOTTA_[0-9A-Z_]*[0-9A-Z]$"
)
self.typo_check_pattern = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$"
self.non_macros = (
"asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
)
def set_return_code(self, return_code):
if return_code > self.return_code:
self.return_code = return_code
def setup_logger(self):
def setup_logger(self, verbose=False):
"""
Set up a logger and set the change the default logging level from
WARNING to INFO. Loggers are better than print statements since their
verbosity can be controlled.
"""
self.log = logging.getLogger()
self.log.setLevel(logging.INFO)
if verbose:
self.log.setLevel(logging.DEBUG)
else:
self.log.setLevel(logging.INFO)
self.log.addHandler(logging.StreamHandler())
def check_repo_path(self):
"""
Check that the current working directory is the project root, and throw
an exception if not.
"""
current_dir = os.path.realpath('.')
root_dir = os.path.dirname(os.path.dirname(
os.path.dirname(os.path.realpath(__file__))))
@ -73,32 +128,81 @@ class NameCheck(object):
filenames.append(os.path.join(root, filename))
return filenames
def get_macros(self):
for header_file in self.header_files:
def parse_macros(self, header_files):
"""
Parse all macros defined by #define preprocessor directives.
Args:
header_files: A list of filepaths to look through.
Returns:
A list of Match objects for the macros.
"""
MACRO_REGEX = r"#define (?P<macro>\w+)"
NON_MACROS = (
"asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
)
macros = []
for header_file in header_files:
with open(header_file, "r") as header:
for line in iter(header.readline, ""):
macro = re.search(self.macro_pattern, line)
if (macro and not
macro.group("macro").startswith(self.non_macros)):
self.macros.append((macro.group("macro"), header_file))
self.macros = list(set(self.macros))
for line in header:
macro = re.search(MACRO_REGEX, line)
if (macro and
not macro.group("macro").startswith(NON_MACROS)):
macros.append(Match(
header_file,
line,
(macro.start(), macro.end()),
macro.group("macro")))
def get_MBED_names(self):
for file_group in [self.header_files, self.library_files]:
for filename in file_group:
with open(filename, "r") as f:
for line in iter(f.readline, ""):
mbed_names = re.findall(self.MBED_pattern, line)
if mbed_names:
for name in mbed_names:
self.MBED_names.append((name, filename))
self.MBED_names = list(set(self.MBED_names))
return macros
def get_enum_consts(self):
for header_file in self.header_files:
def parse_MBED_names(self, header_files, library_files):
"""
Parse all words in the file that begin with MBED. Includes macros.
Args:
header_files: A list of filepaths to look through.
library_files: A list of filepaths to look through.
Returns:
A list of Match objects for words beginning with MBED.
"""
MBED_names = []
for filename in header_files + library_files:
with open(filename, "r") as fp:
for line in fp:
for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line):
MBED_names.append(Match(
filename,
line,
(name.start(), name.end()),
name.group(0)
))
return MBED_names
def parse_enum_consts(self, header_files):
"""
Parse all enum value constants that are declared.
Args:
header_files: A list of filepaths to look through.
Returns:
A list of (enum constants, containing filename).
"""
enum_consts = []
for header_file in header_files:
# Emulate a finite state machine to parse enum declarations.
state = 0
with open(header_file, "r") as header:
for line in iter(header.readline, ""):
for line in header:
if state is 0 and re.match(r"^(typedef )?enum {", line):
state = 1
elif state is 0 and re.match(r"^(typedef )?enum", line):
@ -110,156 +214,314 @@ class NameCheck(object):
elif state is 1:
enum_const = re.match(r"^\s*(?P<enum_const>\w+)", line)
if enum_const:
self.enum_consts.append(
(enum_const.group("enum_const"), header_file)
)
enum_consts.append(Match(
header_file,
line,
(enum_const.start(), enum_const.end()),
enum_const.group("enum_const")))
return enum_consts
def line_contains_declaration(self, line):
return (re.match(r"^[^ /#{]", line)
and not re.match(self.decls_pattern, line))
def parse_identifiers(self, header_files):
"""
Parse all lines of a header where a function identifier is declared,
based on some huersitics. Assumes every line that is not a comment or a
preprocessor directive contains some identifier.
def get_identifier_from_declaration(self, declaration):
identifier = re.search(
r"([a-zA-Z_][a-zA-Z0-9_]*)\(|"
r"\(\*(.+)\)\(|"
r"(\w+)\W*$",
declaration
Args:
header_files: A list of filepaths to look through.
Returns:
A list of (identifier, containing filename)
"""
EXCLUDED_DECLARATIONS = (
r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)"
)
if identifier:
for group in identifier.groups():
if group:
return group
self.log.error(declaration)
raise Exception("No identifier found")
def get_identifiers(self):
for header_file in self.header_files:
identifiers = []
for header_file in header_files:
with open(header_file, "r") as header:
for line in iter(header.readline, ""):
if self.line_contains_declaration(line):
self.identifiers.append(
(self.get_identifier_from_declaration(line),
header_file)
)
in_block_comment = False
def get_symbols(self):
for line in header:
# Skip parsing this line if it begins or ends a block
# comment, and set the state machine's state.
if re.search(r"/\*", line):
in_block_comment = True
continue
elif re.search(r"\*/", line) and in_block_comment:
in_block_comment = False
continue
# Skip parsing this line if it's a line comment, or if it
# begins with a preprocessor directive
if in_block_comment or re.match(r"(//|#)", line):
continue
if re.match(EXCLUDED_DECLARATIONS, line):
continue
identifier = re.search(
# Matches: "mbedtls_aes_init("
r"([a-zA-Z_][a-zA-Z0-9_]*)\(|"
# Matches: "(*f_rng)("
r"\(\*(.+)\)\(|"
# TODO: unknown purpose
r"(\w+)\W*$",
line
)
if identifier:
for group in identifier.groups():
if group:
identifiers.append(Match(
header_file,
line,
(identifier.start(), identifier.end()),
identifier.group(0)))
return identifiers
def parse_symbols(self):
"""
Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
object files using nm to retrieve the list of referenced symbols.
Returns:
A list of unique symbols defined and used in the libraries.
"""
symbols = []
# Back up the config and atomically compile with the full configratuion.
shutil.copy("include/mbedtls/mbedtls_config.h",
"include/mbedtls/mbedtls_config.h.bak")
try:
shutil.copy("include/mbedtls/config.h",
"include/mbedtls/config.h.bak")
subprocess.check_output(
subprocess.run(
["perl", "scripts/config.pl", "full"],
universal_newlines=True,
encoding=sys.stdout.encoding,
check=True
)
my_environment = os.environ.copy()
my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
subprocess.check_output(
subprocess.run(
["make", "clean", "lib"],
env=my_environment,
universal_newlines=True,
encoding=sys.stdout.encoding,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True
)
shutil.move("include/mbedtls/config.h.bak",
"include/mbedtls/config.h")
nm_output = ""
for lib in ["library/libmbedcrypto.a",
"library/libmbedtls.a",
"library/libmbedx509.a"]:
nm_output += subprocess.check_output(
["nm", "-og", lib],
universal_newlines=True,
stderr=subprocess.STDOUT,
)
for line in nm_output.splitlines():
if not re.match(r"^\S+: +U |^$|^\S+:$", line):
symbol = re.match(self.symbol_pattern, line)
if symbol:
self.symbols.append(symbol.group('symbol'))
else:
self.log.error(line)
self.symbols.sort()
subprocess.check_output(
# Perform object file analysis using nm
symbols = self.parse_symbols_from_nm(
["library/libmbedcrypto.a",
"library/libmbedtls.a",
"library/libmbedx509.a"])
symbols.sort()
subprocess.run(
["make", "clean"],
universal_newlines=True,
encoding=sys.stdout.encoding,
check=True
)
except subprocess.CalledProcessError as error:
self.log.error(error)
self.set_return_code(2)
finally:
shutil.move("include/mbedtls/mbedtls_config.h.bak",
"include/mbedtls/mbedtls_config.h")
return symbols
def parse_symbols_from_nm(self, object_files):
"""
Run nm to retrieve the list of referenced symbols in each object file.
Does not return the position data since it is of no use.
Returns:
A list of unique symbols defined and used in any of the object files.
"""
UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$"
VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)"
symbols = []
nm_output = ""
for lib in object_files:
nm_output += subprocess.run(
["nm", "-og", lib],
encoding=sys.stdout.encoding,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True
).stdout
for line in nm_output.splitlines():
if not re.match(UNDEFINED_SYMBOL, line):
symbol = re.match(VALID_SYMBOL, line)
if symbol:
symbols.append(symbol.group('symbol'))
else:
self.log.error(line)
return symbols
def parse_names_in_source(self):
"""
Calls each parsing function to retrieve various elements of the code,
together with their source location. Puts the parsed values in the
internal variable self.parse_result.
"""
self.log.info("Parsing source code...")
m_headers = self.get_files(os.path.join("include", "mbedtls"))
libraries = self.get_files("library")
all_macros = self.parse_macros(m_headers)
enum_consts = self.parse_enum_consts(m_headers)
identifiers = self.parse_identifiers(m_headers)
symbols = self.parse_symbols()
mbed_names = self.parse_MBED_names(m_headers, libraries)
# Remove identifier macros like mbedtls_printf or mbedtls_calloc
macros = list(set(all_macros) - set(identifiers))
self.log.info("Found:")
self.log.info(" {} Macros".format(len(all_macros)))
self.log.info(" {} Enum Constants".format(len(enum_consts)))
self.log.info(" {} Identifiers".format(len(identifiers)))
self.log.info(" {} Exported Symbols".format(len(symbols)))
self.log.info("Analysing...")
self.parse_result = {
"macros": macros,
"enum_consts": enum_consts,
"identifiers": identifiers,
"symbols": symbols,
"mbed_names": mbed_names
}
def perform_checks(self):
"""
Perform each check in order, output its PASS/FAIL status. Maintain an
overall test status, and output that at the end.
"""
problems = 0
problems += self.check_symbols_declared_in_header()
pattern_checks = [
("macros", MACRO_PATTERN),
("enum_consts", MACRO_PATTERN),
("identifiers", IDENTIFIER_PATTERN)]
for group, check_pattern in pattern_checks:
problems += self.check_match_pattern(group, check_pattern)
problems += self.check_for_typos()
self.log.info("=============")
if problems > 0:
self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
else:
self.log.info("PASS")
def check_symbols_declared_in_header(self):
identifiers = [x[0] for x in self.identifiers]
bad_names = []
for symbol in self.symbols:
if symbol not in identifiers:
bad_names.append(symbol)
if bad_names:
self.set_return_code(1)
self.log.info("Names of identifiers: FAIL")
for name in bad_names:
self.log.info(name)
else:
self.log.info("Names of identifiers: PASS")
"""
Perform a check that all detected symbols in the library object files
are properly declared in headers.
Outputs to the logger the PASS/FAIL status, followed by the location of
problems.
def check_group(self, group_to_check, check_pattern, name):
bad_names = []
for item in group_to_check:
if not re.match(check_pattern, item[0]):
bad_names.append("{} - {}".format(item[0], item[1]))
if bad_names:
Returns the number of problems that needs fixing.
"""
problems = []
for symbol in self.parse_result["symbols"]:
found_symbol_declared = False
for identifier_match in self.parse_result["identifiers"]:
if symbol == identifier_match.name:
found_symbol_declared = True
break
if not found_symbol_declared:
problems.append(SymbolNotInHeader(symbol))
if problems:
self.set_return_code(1)
self.log.info("Names of {}: FAIL".format(name))
for name in bad_names:
self.log.info(name)
self.log.info("All symbols in header: FAIL")
for problem in problems:
self.log.info(str(problem) + "\n")
else:
self.log.info("Names of {}: PASS".format(name))
self.log.info("All symbols in header: PASS")
return len(problems)
def check_match_pattern(self, group_to_check, check_pattern):
problems = []
for item_match in self.parse_result[group_to_check]:
if not re.match(check_pattern, item_match.name):
problems.append(PatternMismatch(check_pattern, item_match))
if problems:
self.set_return_code(1)
self.log.info("Naming patterns of {}: FAIL".format(group_to_check))
for problem in problems:
self.log.info(str(problem) + "\n")
else:
self.log.info("Naming patterns of {}: PASS".format(group_to_check))
return len(problems)
def check_for_typos(self):
bad_names = []
all_caps_names = list(set(
[x[0] for x in self.actual_macros + self.enum_consts]
problems = []
all_caps_names = list(set([
match.name for match
in self.parse_result["macros"] + self.parse_result["enum_consts"]]
))
for name in self.MBED_names:
if name[0] not in all_caps_names:
if not re.search(self.typo_check_pattern, name[0]):
bad_names.append("{} - {}".format(name[0], name[1]))
if bad_names:
TYPO_EXCLUSION = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$"
for name_match in self.parse_result["mbed_names"]:
if name_match.name not in all_caps_names:
if not re.search(TYPO_EXCLUSION, name_match.name):
problems.append(Typo(name_match))
if problems:
self.set_return_code(1)
self.log.info("Likely typos: FAIL")
for name in bad_names:
self.log.info(name)
for problem in problems:
self.log.info(str(problem) + "\n")
else:
self.log.info("Likely typos: PASS")
return len(problems)
def get_names_from_source_code(self):
self.log.info("Analysing source code...")
self.get_macros()
self.get_enum_consts()
self.get_identifiers()
self.get_symbols()
self.get_MBED_names()
self.actual_macros = list(set(self.macros) - set(self.identifiers))
self.log.info("{} macros".format(len(self.macros)))
self.log.info("{} enum-consts".format(len(self.enum_consts)))
self.log.info("{} identifiers".format(len(self.identifiers)))
self.log.info("{} exported-symbols".format(len(self.symbols)))
def main():
"""
Main function, parses command-line arguments.
"""
def check_names(self):
self.check_symbols_declared_in_header()
for group, check_pattern, name in [
(self.actual_macros, self.macro_const_check_pattern,
"actual-macros"),
(self.enum_consts, self.macro_const_check_pattern,
"enum-consts"),
(self.identifiers, self.identifier_check_pattern,
"identifiers")]:
self.check_group(group, check_pattern, name)
self.check_for_typos()
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=(
"This script confirms that the naming of all symbols and identifiers "
"in Mbed TLS are consistent with the house style and are also "
"self-consistent.\n\n"
"Expected to be run from the MbedTLS root directory."))
parser.add_argument("-v", "--verbose",
action="store_true",
help="enable script debug outputs")
args = parser.parse_args()
def run_main():
try:
name_check = NameCheck()
name_check.get_names_from_source_code()
name_check.check_names()
name_check.setup_logger(verbose=args.verbose)
name_check.parse_names_in_source()
name_check.perform_checks()
sys.exit(name_check.return_code)
except Exception:
traceback.print_exc()
@ -267,4 +529,4 @@ def run_main():
if __name__ == "__main__":
run_main()
main()