Fix relative link detection across display parts

Gerrit0 · Gerrit0 · commit e9bd40b2f9e2 · 2024-06-22T13:03:01.000-06:00
This feels entirely too much like one of those pieces of code that is going to have to change another 20 times before it's actually correct, but this is better than it used to be... Resolves #2606
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@
 -   Added `@author` to the default list of recognized tags, #2603.
 -   Anchor links are no longer incorrectly checked for relative paths, #2604.
 -   Fixed an issue where line numbers reported in error messages could be incorrect, #2605.
+-   Fixed relative link detection for markdown links containing code in their label, #2606.
 
 ### Thanks!
 
diff --git a/src/lib/converter/comments/parser.ts b/src/lib/converter/comments/parser.ts
@@ -17,7 +17,7 @@ import type {
     TranslationProxy,
 } from "../../internationalization/internationalization";
 import { FileRegistry } from "../../models/FileRegistry";
-import { textContent } from "./textParser";
+import { textContent, TextParserReentryState } from "./textParser";
 
 interface LookaheadGenerator<T> {
     done(): boolean;
@@ -137,13 +137,15 @@ export function parseCommentString(
         },
     };
 
+    const reentry = new TextParserReentryState();
     const content: CommentDisplayPart[] = [];
     const lexer = makeLookaheadGenerator(tokens);
 
     let atNewLine = false;
     while (!lexer.done()) {
         let consume = true;
         const next = lexer.peek();
+        reentry.checkState(next);
 
         switch (next.kind) {
             case TokenSyntaxKind.TypeAnnotation:
@@ -162,6 +164,7 @@ export function parseCommentString(
                     content,
                     files,
                     atNewLine,
+                    reentry,
                 );
                 break;
 
@@ -576,9 +579,11 @@ function blockContent(
 ): CommentDisplayPart[] {
     const content: CommentDisplayPart[] = [];
     let atNewLine = true as boolean;
+    const reentry = new TextParserReentryState();
 
     loop: while (!lexer.done()) {
         const next = lexer.peek();
+        reentry.checkState(next);
         let consume = true;
 
         switch (next.kind) {
@@ -595,6 +600,7 @@ function blockContent(
                     /*out*/ content,
                     files,
                     atNewLine,
+                    reentry,
                 );
                 break;
 
diff --git a/src/lib/converter/comments/textParser.ts b/src/lib/converter/comments/textParser.ts
@@ -34,6 +34,34 @@ interface RelativeLink {
     target: number | undefined;
 }
 
+/**
+ * This is incredibly unfortunate. The comment lexer owns the responsibility
+ * for splitting up text into text/code, this is totally fine for HTML links
+ * but for markdown links, ``[`code`](./link)`` is valid, so we need to keep
+ * track of state across calls to {@link textContent}.
+ */
+export class TextParserReentryState {
+    withinLinkLabel = false;
+    private lastPartWasNewline = false;
+
+    checkState(token: Token) {
+        switch (token.kind) {
+            case TokenSyntaxKind.Code:
+                if (/\n\s*\n/.test(token.text)) {
+                    this.withinLinkLabel = false;
+                }
+                break;
+            case TokenSyntaxKind.NewLine:
+                if (this.lastPartWasNewline) {
+                    this.withinLinkLabel = false;
+                }
+                break;
+        }
+
+        this.lastPartWasNewline = token.kind === TokenSyntaxKind.NewLine;
+    }
+}
+
 /**
  * Look for relative links within a piece of text and add them to the {@link FileRegistry}
  * so that they can be correctly resolved during rendering.
@@ -46,6 +74,7 @@ export function textContent(
     outContent: CommentDisplayPart[],
     files: FileRegistry,
     atNewLine: boolean,
+    reentry: TextParserReentryState,
 ) {
     let lastPartEnd = 0;
     const data: TextParserData = {
@@ -86,7 +115,7 @@ export function textContent(
     }
 
     while (data.pos < token.text.length) {
-        const link = checkMarkdownLink(data);
+        const link = checkMarkdownLink(data, reentry);
         if (link) {
             addRef(link);
             continue;
@@ -123,37 +152,54 @@ export function textContent(
  * Reference: https://github.com/markdown-it/markdown-it/blob/14.1.0/lib/rules_inline/image.mjs
  *
  */
-function checkMarkdownLink(data: TextParserData): RelativeLink | undefined {
+function checkMarkdownLink(
+    data: TextParserData,
+    reentry: TextParserReentryState,
+): RelativeLink | undefined {
     const { token, sourcePath, files } = data;
 
-    if (token.text[data.pos] === "[") {
-        const labelEnd = findLabelEnd(token.text, data.pos + 1);
-        if (
-            labelEnd !== -1 &&
-            token.text[labelEnd] === "]" &&
-            token.text[labelEnd + 1] === "("
-        ) {
-            const link = MdHelpers.parseLinkDestination(
-                token.text,
-                labelEnd + 2,
-                token.text.length,
-            );
+    let searchStart: number;
+    if (reentry.withinLinkLabel) {
+        searchStart = data.pos;
+        reentry.withinLinkLabel = false;
+    } else if (token.text[data.pos] === "[") {
+        searchStart = data.pos + 1;
+    } else {
+        return;
+    }
 
-            if (link.ok) {
-                // Only make a relative-link display part if it's actually a relative link.
-                // Discard protocol:// links, unix style absolute paths, and windows style absolute paths.
-                if (isRelativeLink(link.str)) {
-                    return {
-                        pos: labelEnd + 2,
-                        end: link.pos,
-                        target: files.register(sourcePath, link.str),
-                    };
-                }
+    const labelEnd = findLabelEnd(token.text, searchStart);
+    if (labelEnd === -1) {
+        // This markdown link might be split across multiple display parts
+        // [ `text` ](link)
+        // ^^ text
+        //   ^^^^^^ code
+        //         ^^^^^^^^ text
+        reentry.withinLinkLabel = true;
+        return;
+    }
+
+    if (token.text[labelEnd] === "]" && token.text[labelEnd + 1] === "(") {
+        const link = MdHelpers.parseLinkDestination(
+            token.text,
+            labelEnd + 2,
+            token.text.length,
+        );
 
-                // This was a link, skip ahead to ensure we don't happen to parse
-                // something else as a link within the link.
-                data.pos = link.pos - 1;
+        if (link.ok) {
+            // Only make a relative-link display part if it's actually a relative link.
+            // Discard protocol:// links, unix style absolute paths, and windows style absolute paths.
+            if (isRelativeLink(link.str)) {
+                return {
+                    pos: labelEnd + 2,
+                    end: link.pos,
+                    target: files.register(sourcePath, link.str),
+                };
             }
+
+            // This was a link, skip ahead to ensure we don't happen to parse
+            // something else as a link within the link.
+            data.pos = link.pos - 1;
         }
     }
 }
@@ -265,6 +311,7 @@ function findLabelEnd(text: string, pos: number) {
         switch (text[pos]) {
             case "\n":
             case "]":
+            case "[":
                 return pos;
         }
         ++pos;
diff --git a/src/lib/models/FileRegistry.ts b/src/lib/models/FileRegistry.ts
@@ -8,7 +8,7 @@ import type { Reflection } from "./reflections";
 export class FileRegistry {
     protected nextId = 1;
 
-    // The combination of thest two make up the registry
+    // The combination of these two make up the registry
     protected mediaToReflection = new Map<number, Reflection>();
     protected mediaToPath = new Map<number, string>();
 
diff --git a/src/test/comments.test.ts b/src/test/comments.test.ts
@@ -1371,6 +1371,48 @@ describe("Comment Parser", () => {
         ] satisfies CommentDisplayPart[]);
     });
 
+    it("#2606 Recognizes markdown links which contain inline code in the label", () => {
+        const comment = getComment(`/**
+            * [\`text\`](./relative.md)
+            * [\`text\`
+            * more](./relative.md)
+            * [\`text\`
+            *
+            * more](./relative.md)
+            */`);
+
+        equal(comment.summary, [
+            // Simple case with code
+            { kind: "text", text: "[" },
+            { kind: "code", text: "`text`" },
+            { kind: "text", text: "](" },
+            { kind: "relative-link", text: "./relative.md", target: 1 },
+            // Labels can also include single newlines
+            { kind: "text", text: ")\n[" },
+            { kind: "code", text: "`text`" },
+            { kind: "text", text: "\nmore](" },
+            { kind: "relative-link", text: "./relative.md", target: 1 },
+            // But not double!
+            { kind: "text", text: ")\n[" },
+            { kind: "code", text: "`text`" },
+            { kind: "text", text: "\n\nmore](./relative.md)" },
+        ] satisfies CommentDisplayPart[]);
+    });
+
+    it("Recognizes markdown links which contain inline code in the label", () => {
+        const comment = getComment(`/**
+            * [\`text\`](./relative.md)
+            */`);
+
+        equal(comment.summary, [
+            { kind: "text", text: "[" },
+            { kind: "code", text: "`text`" },
+            { kind: "text", text: "](" },
+            { kind: "relative-link", text: "./relative.md", target: 1 },
+            { kind: "text", text: ")" },
+        ] satisfies CommentDisplayPart[]);
+    });
+
     it("Recognizes markdown reference definition blocks", () => {
         const comment = getComment(`/**
             * [1]: ./example.md