Skip to content

[libc++] Rewrite the transitive header checking machinery #110554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libcxx/test/libcxx/header_inclusions.gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from libcxx.header_information import lit_header_restrictions, public_headers, mandatory_inclusions

for header in public_headers:
header_guard = lambda h: f"_LIBCPP_{h.upper().replace('.', '_').replace('/', '_')}"
header_guard = lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}"

# <cassert> has no header guards
if header == 'cassert':
Expand Down
22 changes: 6 additions & 16 deletions libcxx/test/libcxx/headers_in_modulemap.sh.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
# RUN: %{python} %s %{libcxx-dir}/utils %{include-dir}
# RUN: %{python} %s %{libcxx-dir}/utils

import sys

sys.path.append(sys.argv[1])
from libcxx.header_information import all_headers, libcxx_include

import pathlib
import sys
from libcxx.header_information import is_modulemap_header, is_header

headers = list(pathlib.Path(sys.argv[2]).rglob("*"))
modulemap = open(f"{sys.argv[2]}/module.modulemap").read()
with open(libcxx_include / "module.modulemap") as f:
modulemap = f.read()

isHeaderMissing = False

for header in headers:
if not is_header(header):
continue

header = header.relative_to(pathlib.Path(sys.argv[2])).as_posix()

if not is_modulemap_header(header):
for header in all_headers:
if not header.is_in_modulemap():
continue

if not str(header) in modulemap:
Expand Down
12 changes: 6 additions & 6 deletions libcxx/test/libcxx/transitive_includes.gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@

all_traces = []
for header in sorted(public_headers):
if header.endswith(".h"): # Skip C compatibility or detail headers
if header.is_C_compatibility() or header.is_internal():
continue

normalized_header = re.sub("/", "_", header)
normalized_header = re.sub("/", "_", str(header))
print(
f"""\
// RUN: echo "#include <{header}>" | %{{cxx}} -xc++ - %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.{normalized_header}.txt
Expand All @@ -55,17 +55,17 @@

print(
f"""\
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes_to_csv.py {' '.join(all_traces)} > %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py {' '.join(all_traces)} > %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv
"""
)

else:
for header in public_headers:
if header.endswith(".h"): # Skip C compatibility or detail headers
if header.is_C_compatibility() or header.is_internal():
continue

# Escape slashes for the awk command below
escaped_header = header.replace("/", "\\/")
escaped_header = str(header).replace("/", "\\/")

print(
f"""\
Expand All @@ -92,7 +92,7 @@

// RUN: mkdir %t
// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes_to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv
// RUN: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv
// RUN: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv
#include <{header}>
Expand Down
120 changes: 120 additions & 0 deletions libcxx/test/libcxx/transitive_includes/to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env python
# ===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===----------------------------------------------------------------------===##

from typing import List, Tuple, Optional
import argparse
import io
import itertools
import os
import pathlib
import re
import sys

libcxx_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
sys.path.append(os.path.join(libcxx_root, "utils"))
from libcxx.header_information import Header

def parse_line(line: str) -> Tuple[int, str]:
"""
Parse a single line of --trace-includes output.

Returns the inclusion level and the raw file name being included.
"""
match = re.match(r"(\.+) (.+)", line)
if not match:
raise ArgumentError(f"Line {line} contains invalid data.")

# The number of periods in front of the header name is the nesting level of
# that header.
return (len(match.group(1)), match.group(2))

def make_cxx_v1_relative(header: str) -> Optional[str]:
"""
Returns the path of the header as relative to <whatever>/c++/v1, or None if the path
doesn't contain c++/v1.

We use that heuristic to figure out which headers are libc++ headers.
"""
# On Windows, the path separators can either be forward slash or backslash.
# If it is a backslash, Clang prints it escaped as two consecutive
# backslashes, and they need to be escaped in the RE. (Use a raw string for
# the pattern to avoid needing another level of escaping on the Python string
# literal level.)
pathsep = r"(?:/|\\\\)"
CXX_V1_REGEX = r"^.*c\+\+" + pathsep + r"v[0-9]+" + pathsep + r"(.+)$"
match = re.match(CXX_V1_REGEX, header)
if not match:
return None
else:
return match.group(1)

def parse_file(file: io.TextIOBase) -> List[Tuple[Header, Header]]:
"""
Parse a file containing --trace-includes output to generate a list of the
transitive includes contained in it.
"""
result = []
includer = None
for line in file.readlines():
(level, header) = parse_line(line)
relative = make_cxx_v1_relative(header)

# Not a libc++ header
if relative is None:
continue

# If we're at the first level, remember this header as being the one who includes other headers.
# There's usually exactly one, except if the compiler is passed a file with `-include`.
if level == 1:
includer = Header(relative)
continue

# Otherwise, take note that this header is being included by the top-level includer.
else:
assert includer is not None
result.append((includer, Header(relative)))
return result

def print_csv(includes: List[Tuple[Header, Header]]) -> None:
"""
Print the transitive includes as space-delimited CSV.

This function only prints public libc++ headers that are not C compatibility headers.
"""
# Sort and group by includer
by_includer = lambda t: t[0]
includes = itertools.groupby(sorted(includes, key=by_includer), key=by_includer)

for (includer, includees) in includes:
includees = map(lambda t: t[1], includees)
for h in sorted(set(includees)):
if h.is_public() and not h.is_C_compatibility():
print(f"{includer} {h}")

def main(argv):
parser = argparse.ArgumentParser(
description="""
Given a list of headers produced by --trace-includes, produce a list of libc++ headers in that output.

Note that -fshow-skipped-includes must also be passed to the compiler in order to get sufficient
information for this script to run.

The output of this script is provided in space-delimited CSV format where each line contains:

<header performing inclusion> <header being included>
""")
parser.add_argument("inputs", type=argparse.FileType("r"), nargs='+', default=None,
help="One or more files containing the result of --trace-includes")
args = parser.parse_args(argv)

includes = [line for file in args.inputs for line in parse_file(file)]
print_csv(includes)

if __name__ == "__main__":
main(sys.argv[1:])
147 changes: 0 additions & 147 deletions libcxx/test/libcxx/transitive_includes_to_csv.py

This file was deleted.

2 changes: 1 addition & 1 deletion libcxx/utils/generate_iwyu_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def main(argv: typing.List[str]):

mappings = [] # Pairs of (header, public_header)
for header in libcxx.header_information.all_headers:
public_headers = IWYU_mapping(header)
public_headers = IWYU_mapping(str(header))
if public_headers is not None:
mappings.extend((header, public) for public in public_headers)

Expand Down
14 changes: 3 additions & 11 deletions libcxx/utils/generate_libcxx_cppm_in.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,11 @@
import os.path
import sys

from libcxx.header_information import module_c_headers
from libcxx.header_information import module_headers
from libcxx.header_information import header_restrictions
from libcxx.header_information import headers_not_available
from libcxx.header_information import module_c_headers, module_headers, header_restrictions, headers_not_available, libcxx_root


def write_file(module):
libcxx_module_directory = os.path.join(
os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "modules"
)
with open(
os.path.join(libcxx_module_directory, f"{module}.cppm.in"), "w"
) as module_cpp_in:
with open(libcxx_root / "modules" / f"{module}.cppm.in", "w") as module_cpp_in:
module_cpp_in.write(
"""\
// -*- C++ -*-
Expand All @@ -45,7 +37,7 @@ def write_file(module):
// and the headers of Table 25: C++ headers for C library facilities [tab:headers.cpp.c]
"""
)
for header in module_headers if module == "std" else module_c_headers:
for header in sorted(module_headers if module == "std" else module_c_headers):
if header in header_restrictions:
module_cpp_in.write(
f"""\
Expand Down
Loading
Loading