Skip to content

Commit 21bb6e2

Browse files
committed
feat: create llms.txt generator
1 parent 2b200f1 commit 21bb6e2

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

src/constants.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@ export const DOC_NODE_VERSION = process.version;
66
// This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions
77
export const DOC_NODE_CHANGELOG_URL =
88
'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md';
9+
10+
export const DOC_API_LATEST_BASE_URL = 'https://nodejs.org/docs/latest';

src/generators/index.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import addonVerify from './addon-verify/index.mjs';
1010
import apiLinks from './api-links/index.mjs';
1111
import oramaDb from './orama-db/index.mjs';
1212
import astJs from './ast-js/index.mjs';
13+
import llmsTxt from './llms-txt/index.mjs';
1314

1415
export const publicGenerators = {
1516
'json-simple': jsonSimple,
@@ -21,6 +22,7 @@ export const publicGenerators = {
2122
'addon-verify': addonVerify,
2223
'api-links': apiLinks,
2324
'orama-db': oramaDb,
25+
'llms-txt': llmsTxt,
2426
};
2527

2628
export const allGenerators = {

src/generators/llms-txt/index.mjs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
'use strict';
2+
3+
import { writeFile } from 'node:fs/promises';
4+
import { join } from 'node:path';
5+
import { DOC_API_LATEST_BASE_URL } from '../../constants.mjs';
6+
7+
const IGNORE_LIST = ['doc/api/synopsis.md'];
8+
9+
/**
10+
* Extracts text content from a node recursively
11+
*
12+
* @param {import('mdast').Paragraph} node The AST node to extract text from
13+
* @returns {string} The extracted text content
14+
*/
15+
function extractTextContent(node) {
16+
if (!node) {
17+
return '';
18+
}
19+
20+
if (node.type === 'text' || node.type === 'inlineCode') {
21+
return node.value;
22+
}
23+
24+
if (node.children && Array.isArray(node.children)) {
25+
return node.children.map(extractTextContent).join('');
26+
}
27+
28+
return '';
29+
}
30+
31+
/**
32+
* Extracts text from a paragraph node.
33+
*
34+
* @param {import('mdast').Paragraph} node The paragraph node to extract text from
35+
* @returns {string} The extracted text content
36+
* @throws {Error} If the node is not a paragraph
37+
*/
38+
function paragraphToString(node) {
39+
if (node.type !== 'paragraph') {
40+
throw new Error('Node is not a paragraph');
41+
}
42+
43+
return node.children.map(extractTextContent).join('');
44+
}
45+
46+
/**
47+
* Generates a documentation entry string
48+
*
49+
* @param {ApiDocMetadataEntry} entry
50+
* @returns {string}
51+
*/
52+
function generateDocEntry(entry) {
53+
if (IGNORE_LIST.includes(entry.api_doc_source)) {
54+
return null;
55+
}
56+
57+
if (entry.heading.depth !== 1) {
58+
return null;
59+
}
60+
61+
// Remove the leading /doc of string
62+
const path = entry.api_doc_source.replace(/^doc\//, '');
63+
64+
const entryLink = `[${entry.heading.data.name}](${DOC_API_LATEST_BASE_URL}/${path})`;
65+
66+
const descriptionNode = entry.content.children.find(
67+
child => child.type === 'paragraph'
68+
);
69+
70+
if (!descriptionNode) {
71+
console.warn(`No description found for entry: ${entry.api_doc_source}`);
72+
return `- ${entryLink}`;
73+
}
74+
75+
const description = paragraphToString(descriptionNode).replace(
76+
/[\r\n]+/g,
77+
' '
78+
);
79+
80+
return `- ${entryLink}: ${description}`;
81+
}
82+
83+
/**
84+
* @typedef {Array<ApiDocMetadataEntry>} Input
85+
*
86+
* @type {GeneratorMetadata<Input, string>}
87+
*/
88+
export default {
89+
name: 'llms-txt',
90+
version: '0.1.0',
91+
description: 'Generates a llms.txt file of the API docs',
92+
dependsOn: 'ast',
93+
94+
/**
95+
* @param {Input} input The API documentation metadata
96+
* @param {Partial<GeneratorOptions>} options Generator options
97+
* @returns {Promise<string>} The generated documentation text
98+
*/
99+
async generate(input, options) {
100+
const output = [
101+
'# Node.js Documentation',
102+
'> Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications.',
103+
'## Introduction',
104+
`- [About this documentation](${DOC_API_LATEST_BASE_URL}/api/documentation.md)`,
105+
`- [Usage and example](${DOC_API_LATEST_BASE_URL}/api/synopsis.md)`,
106+
'## API Documentation',
107+
];
108+
109+
const docEntries = input.map(generateDocEntry).filter(Boolean);
110+
111+
output.push(...docEntries);
112+
113+
const resultText = output.join('\n');
114+
115+
if (options.output) {
116+
await writeFile(join(options.output, 'llms.txt'), resultText);
117+
}
118+
119+
return resultText;
120+
},
121+
};

0 commit comments

Comments
 (0)