|
| 1 | +//===--- Bracket.cpp - Analyze bracket structure --------------------------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// The basic phases of our bracket matching are: |
| 10 | +// |
| 11 | +// 1) A simple "greedy" match looks for well-nested subsequences. |
| 12 | +// |
| 13 | +// We can't fully trust the results of this, consider: |
| 14 | +// while (1) { // A |
| 15 | +// if (true) { // B |
| 16 | +// break; |
| 17 | +// } // C |
| 18 | +// Greedy matching will match B=C, when we should at least consider A=C. |
| 19 | +// However for the correct parts of the file, the greedy match gives the |
| 20 | +// right answer. It produces useful candidates for phase 2. |
| 21 | +// |
| 22 | +// simplePairBrackets handles this step. |
| 23 | +// |
| 24 | +// 2) Try to identify places where formatting indicates that the greedy match |
| 25 | +// was correct. This is similar to how a human would scan a large file. |
| 26 | +// |
| 27 | +// For example: |
| 28 | +// int foo() { // X |
| 29 | +// // indented |
| 30 | +// while (1) { |
| 31 | +// // valid code |
| 32 | +// } |
| 33 | +// return bar(42); |
| 34 | +// } // Y |
| 35 | +// We can "verify" that X..Y looks like a braced block, and the greedy match |
| 36 | +// tells us that substring is perfectly nested. |
| 37 | +// We trust the pairings of those brackets and don't examine them further. |
| 38 | +// However in the first example above, we do not trust B=C because the brace |
| 39 | +// indentation is suspect. |
| 40 | +// |
| 41 | +// FIXME: implement this step. |
| 42 | +// |
| 43 | +// 3) Run full best-match optimization on remaining brackets. |
| 44 | +// |
| 45 | +// Conceptually, this considers all possible matchings and optimizes cost: |
| 46 | +// - there is a cost for failing to match a bracket |
| 47 | +// - there is a variable cost for matching two brackets. |
| 48 | +// (For example if brace indentation doesn't match). |
| 49 | +// |
| 50 | +// In the first example we have three alternatives, and they are ranked: |
| 51 | +// 1) A=C, skip B |
| 52 | +// 2) B=C, skip A |
| 53 | +// 3) skip A, skip B, skip C |
| 54 | +// The cost for skipping a bracket is high, so option 3 is worst. |
| 55 | +// B=C costs more than A=C, because the indentation doesn't match. |
| 56 | +// |
| 57 | +// It would be correct to run this step alone, but it would be too slow. |
| 58 | +// The implementation is dynamic programming in N^3 space and N^2 time. |
| 59 | +// Having earlier steps filter out most brackets is key to performance. |
| 60 | +// |
| 61 | +// FIXME: implement this step. |
| 62 | +// |
| 63 | +//===----------------------------------------------------------------------===// |
| 64 | + |
| 65 | +#include "clang-pseudo/Bracket.h" |
| 66 | + |
| 67 | +namespace clang { |
| 68 | +namespace pseudo { |
| 69 | +namespace { |
| 70 | + |
| 71 | +struct Bracket { |
| 72 | + using Index = unsigned; |
| 73 | + constexpr static Index None = -1; |
| 74 | + |
| 75 | + enum BracketKind : char { Paren, Brace, Square } Kind; |
| 76 | + enum Direction : bool { Open, Close } Dir; |
| 77 | + unsigned Line; |
| 78 | + unsigned Indent; |
| 79 | + Token::Index Tok; |
| 80 | + Bracket::Index Pair = None; |
| 81 | +}; |
| 82 | + |
| 83 | +// Find brackets in the stream and convert to Bracket struct. |
| 84 | +std::vector<Bracket> findBrackets(const TokenStream &Stream) { |
| 85 | + std::vector<Bracket> Brackets; |
| 86 | + auto Add = [&](const pseudo::Token &Tok, Bracket::BracketKind K, |
| 87 | + Bracket::Direction D) { |
| 88 | + Brackets.push_back( |
| 89 | + {K, D, Tok.Line, Tok.Indent, Stream.index(Tok), Bracket::None}); |
| 90 | + }; |
| 91 | + for (const auto &Tok : Stream.tokens()) { |
| 92 | + switch (Tok.Kind) { |
| 93 | + case clang::tok::l_paren: |
| 94 | + Add(Tok, Bracket::Paren, Bracket::Open); |
| 95 | + break; |
| 96 | + case clang::tok::r_paren: |
| 97 | + Add(Tok, Bracket::Paren, Bracket::Close); |
| 98 | + break; |
| 99 | + case clang::tok::l_brace: |
| 100 | + Add(Tok, Bracket::Brace, Bracket::Open); |
| 101 | + break; |
| 102 | + case clang::tok::r_brace: |
| 103 | + Add(Tok, Bracket::Brace, Bracket::Close); |
| 104 | + break; |
| 105 | + case clang::tok::l_square: |
| 106 | + Add(Tok, Bracket::Square, Bracket::Open); |
| 107 | + break; |
| 108 | + case clang::tok::r_square: |
| 109 | + Add(Tok, Bracket::Square, Bracket::Close); |
| 110 | + break; |
| 111 | + default: |
| 112 | + break; |
| 113 | + } |
| 114 | + } |
| 115 | + return Brackets; |
| 116 | +} |
| 117 | + |
| 118 | +// Write the bracket pairings from Brackets back to Tokens. |
| 119 | +void applyPairings(ArrayRef<Bracket> Brackets, TokenStream &Tokens) { |
| 120 | + for (const auto &B : Brackets) |
| 121 | + Tokens.tokens()[B.Tok].Pair = |
| 122 | + (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok; |
| 123 | +} |
| 124 | + |
| 125 | +// Find perfect pairings (ignoring whitespace) via greedy algorithm. |
| 126 | +// This means two brackets are paired if they match and the brackets between |
| 127 | +// them nest perfectly, with no skipped or crossed brackets. |
| 128 | +void simplePairBrackets(MutableArrayRef<Bracket> Brackets) { |
| 129 | + std::vector<unsigned> Stack; |
| 130 | + for (unsigned I = 0; I < Brackets.size(); ++I) { |
| 131 | + if (Brackets[I].Dir == Bracket::Open) { |
| 132 | + Stack.push_back(I); |
| 133 | + } else if (!Stack.empty() && |
| 134 | + Brackets[Stack.back()].Kind == Brackets[I].Kind) { |
| 135 | + Brackets[Stack.back()].Pair = I; |
| 136 | + Brackets[I].Pair = Stack.back(); |
| 137 | + Stack.pop_back(); |
| 138 | + } else { |
| 139 | + // Unpaired closer, no brackets on stack are part of a perfect sequence. |
| 140 | + Stack.clear(); |
| 141 | + } |
| 142 | + } |
| 143 | + // Any remaining brackets on the stack stay unpaired. |
| 144 | +} |
| 145 | + |
| 146 | +} // namespace |
| 147 | + |
| 148 | +void pairBrackets(TokenStream &Stream) { |
| 149 | + auto Brackets = findBrackets(Stream); |
| 150 | + simplePairBrackets(Brackets); |
| 151 | + applyPairings(Brackets, Stream); |
| 152 | +} |
| 153 | + |
| 154 | +} // namespace pseudo |
| 155 | +} // namespace clang |
0 commit comments