mirror of
https://github.com/Mbed-TLS/mbedtls.git
synced 2025-02-15 06:40:48 +00:00
Fix // comments stopping on 'n' instead of newlines. Also allow backslash-newline in // comments. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
132 lines
4.9 KiB
Python
132 lines
4.9 KiB
Python
"""Helper functions to parse C code in heavily constrained scenarios.
|
|
|
|
Currently supported functionality:
|
|
|
|
* read_function_declarations: read function declarations from a header file.
|
|
"""
|
|
|
|
# Copyright The Mbed TLS Contributors
|
|
# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
|
|
|
|
### WARNING: the code in this file has not been extensively reviewed yet.
|
|
### We do not think it is harmful, but it may be below our normal standards
|
|
### for robustness and maintainability.
|
|
|
|
import re
|
|
from typing import Dict, Iterable, Iterator, List, Optional, Tuple
|
|
|
|
|
|
class ArgumentInfo:
|
|
"""Information about an argument to an API function."""
|
|
#pylint: disable=too-few-public-methods
|
|
|
|
_KEYWORDS = [
|
|
'const', 'register', 'restrict',
|
|
'int', 'long', 'short', 'signed', 'unsigned',
|
|
]
|
|
_DECLARATION_RE = re.compile(
|
|
r'(?P<type>\w[\w\s*]*?)\s*' +
|
|
r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P<name>\b\w+\b)?' +
|
|
r'\s*(?P<suffix>\[[^][]*\])?\Z',
|
|
re.A | re.S)
|
|
|
|
@classmethod
|
|
def normalize_type(cls, typ: str) -> str:
|
|
"""Normalize whitespace in a type."""
|
|
typ = re.sub(r'\s+', r' ', typ)
|
|
typ = re.sub(r'\s*\*', r' *', typ)
|
|
return typ
|
|
|
|
def __init__(self, decl: str) -> None:
|
|
self.decl = decl.strip()
|
|
m = self._DECLARATION_RE.match(self.decl)
|
|
if not m:
|
|
raise ValueError(self.decl)
|
|
self.type = self.normalize_type(m.group('type')) #type: str
|
|
self.name = m.group('name') #type: Optional[str]
|
|
self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str
|
|
|
|
|
|
class FunctionInfo:
|
|
"""Information about an API function."""
|
|
#pylint: disable=too-few-public-methods
|
|
|
|
# Regex matching the declaration of a function that returns void.
|
|
VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A)
|
|
|
|
def __init__(self, #pylint: disable=too-many-arguments
|
|
filename: str,
|
|
line_number: int,
|
|
qualifiers: Iterable[str],
|
|
return_type: str,
|
|
name: str,
|
|
arguments: List[str]) -> None:
|
|
self.filename = filename
|
|
self.line_number = line_number
|
|
self.qualifiers = frozenset(qualifiers)
|
|
self.return_type = return_type
|
|
self.name = name
|
|
self.arguments = [ArgumentInfo(arg) for arg in arguments]
|
|
|
|
def returns_void(self) -> bool:
|
|
"""Whether the function returns void."""
|
|
return bool(self.VOID_RE.search(self.return_type))
|
|
|
|
|
|
# Match one C comment.
|
|
# Note that we match both comment types, so things like // in a /*...*/
|
|
# comment are handled correctly.
|
|
_C_COMMENT_RE = re.compile(r'//(?:[^\n]|\\\n)*|/\*.*?\*/', re.S)
|
|
_NOT_NEWLINES_RE = re.compile(r'[^\n]+')
|
|
|
|
def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]:
|
|
"""Read logical lines from a file.
|
|
|
|
Logical lines are one or more physical line, with balanced parentheses.
|
|
"""
|
|
with open(filename, encoding='utf-8') as inp:
|
|
content = inp.read()
|
|
# Strip comments, but keep newlines for line numbering
|
|
content = re.sub(_C_COMMENT_RE,
|
|
lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)),
|
|
content)
|
|
lines = enumerate(content.splitlines(), 1)
|
|
for line_number, line in lines:
|
|
# Read a logical line, containing balanced parentheses.
|
|
# We assume that parentheses are balanced (this should be ok
|
|
# since comments have been stripped), otherwise there will be
|
|
# a gigantic logical line at the end.
|
|
paren_level = line.count('(') - line.count(')')
|
|
while paren_level > 0:
|
|
_, more = next(lines) #pylint: disable=stop-iteration-return
|
|
paren_level += more.count('(') - more.count(')')
|
|
line += '\n' + more
|
|
yield line_number, line
|
|
|
|
_C_FUNCTION_DECLARATION_RE = re.compile(
|
|
r'(?P<qualifiers>(?:(?:extern|inline|static)\b\s*)*)'
|
|
r'(?P<return_type>\w[\w\s*]*?)\s*' +
|
|
r'\b(?P<name>\w+)' +
|
|
r'\s*\((?P<arguments>.*)\)\s*;',
|
|
re.A | re.S)
|
|
|
|
def read_function_declarations(functions: Dict[str, FunctionInfo],
|
|
filename: str) -> None:
|
|
"""Collect function declarations from a C header file."""
|
|
for line_number, line in read_logical_lines(filename):
|
|
m = _C_FUNCTION_DECLARATION_RE.match(line)
|
|
if not m:
|
|
continue
|
|
qualifiers = m.group('qualifiers').split()
|
|
return_type = m.group('return_type')
|
|
name = m.group('name')
|
|
arguments = m.group('arguments').split(',')
|
|
if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]):
|
|
arguments = []
|
|
# Note: we replace any existing declaration for the same name.
|
|
functions[name] = FunctionInfo(filename, line_number,
|
|
qualifiers,
|
|
return_type,
|
|
name,
|
|
arguments)
|