[clangd] Use ML Code completion ranking as default.

usx95 · usx95 · commit 438b5bb05a42 · 2021-03-02T10:05:37.000+01:00
This makes code completion use a Decision Forest based ranking algorithm to rank completion candidates. [Esitmated 6% accuracy boost]. This was previously hidden behind the flag --ranking-model=decision_forest. This patch makes it the default ranking algorithm. Note: this is a generic model, not specialized for any particular project. clangd does not collect or upload data to train code completion. Also treat Keywords separately as they are not recorded by the training set generator. Differential Revision: https://reviews.llvm.org/D96353
diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h
@@ -133,7 +133,7 @@ struct CodeCompleteOptions {
   enum CodeCompletionRankingModel {
     Heuristics,
     DecisionForest,
-  } RankingModel = Heuristics;
+  } RankingModel = DecisionForest;
 
   /// Callback used to score a CompletionCandidate if DecisionForest ranking
   /// model is enabled.
diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp
@@ -580,12 +580,16 @@ evaluateDecisionForest(const SymbolQualitySignals &Quality,
   // multiplciative boost (like NameMatch). This allows us to weigh the
   // prediciton score and NameMatch appropriately.
   Scores.ExcludingName = pow(Base, Evaluate(E));
-  // NeedsFixIts is not part of the DecisionForest as generating training
-  // data that needs fixits is not-feasible.
+  // Following cases are not part of the generated training dataset:
+  //  - Symbols with `NeedsFixIts`.
+  //  - Forbidden symbols.
+  //  - Keywords: Dataset contains only macros and decls.
   if (Relevance.NeedsFixIts)
     Scores.ExcludingName *= 0.5;
   if (Relevance.Forbidden)
     Scores.ExcludingName *= 0;
+  if (Quality.Category == SymbolQualitySignals::Keyword)
+    Scores.ExcludingName *= 4;
 
   // NameMatch should be a multiplier on total score to support rescoring.
   Scores.Total = Relevance.NameMatch * Scores.ExcludingName;
diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -647,13 +647,13 @@ TEST(CompletionTest, ScopedWithFilter) {
 }
 
 TEST(CompletionTest, ReferencesAffectRanking) {
-  auto Results = completions("int main() { abs^ }", {ns("absl"), func("absb")});
-  EXPECT_THAT(Results.Completions,
-              HasSubsequence(Named("absb"), Named("absl")));
-  Results = completions("int main() { abs^ }",
-                        {withReferences(10000, ns("absl")), func("absb")});
-  EXPECT_THAT(Results.Completions,
-              HasSubsequence(Named("absl"), Named("absb")));
+  EXPECT_THAT(completions("int main() { abs^ }", {func("absA"), func("absB")})
+                  .Completions,
+              HasSubsequence(Named("absA"), Named("absB")));
+  EXPECT_THAT(completions("int main() { abs^ }",
+                          {func("absA"), withReferences(1000, func("absB"))})
+                  .Completions,
+              HasSubsequence(Named("absB"), Named("absA")));
 }
 
 TEST(CompletionTest, ContextWords) {