Skip to content

feat: create llms.txt generator #254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 3, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/constants.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ export const DOC_NODE_VERSION = process.version;
// This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions
export const DOC_NODE_CHANGELOG_URL =
'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md';

export const DOC_API_LATEST_BASE_URL = 'https://nodejs.org/docs/latest';
2 changes: 2 additions & 0 deletions src/generators/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import addonVerify from './addon-verify/index.mjs';
import apiLinks from './api-links/index.mjs';
import oramaDb from './orama-db/index.mjs';
import astJs from './ast-js/index.mjs';
import llmsTxt from './llms-txt/index.mjs';

export const publicGenerators = {
'json-simple': jsonSimple,
Expand All @@ -21,6 +22,7 @@ export const publicGenerators = {
'addon-verify': addonVerify,
'api-links': apiLinks,
'orama-db': oramaDb,
'llms-txt': llmsTxt,
};

export const allGenerators = {
Expand Down
121 changes: 121 additions & 0 deletions src/generators/llms-txt/index.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
'use strict';

import { writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { DOC_API_LATEST_BASE_URL } from '../../constants.mjs';

const IGNORE_LIST = ['doc/api/synopsis.md'];

/**
* Extracts text content from a node recursively
*
* @param {import('mdast').Paragraph} node The AST node to extract text from
* @returns {string} The extracted text content
*/
function extractTextContent(node) {
if (!node) {
return '';
}

if (node.type === 'text' || node.type === 'inlineCode') {
return node.value;
}

if (node.children && Array.isArray(node.children)) {
return node.children.map(extractTextContent).join('');
}

return '';
}

/**
* Extracts text from a paragraph node.
*
* @param {import('mdast').Paragraph} node The paragraph node to extract text from
* @returns {string} The extracted text content
* @throws {Error} If the node is not a paragraph
*/
function paragraphToString(node) {
if (node.type !== 'paragraph') {
throw new Error('Node is not a paragraph');
}

return node.children.map(extractTextContent).join('');
}

/**
* Generates a documentation entry string
*
* @param {ApiDocMetadataEntry} entry
* @returns {string}
*/
function generateDocEntry(entry) {
if (IGNORE_LIST.includes(entry.api_doc_source)) {
return null;
}

if (entry.heading.depth !== 1) {
return null;
}

// Remove the leading /doc of string
const path = entry.api_doc_source.replace(/^doc\//, '');

const entryLink = `[${entry.heading.data.name}](${DOC_API_LATEST_BASE_URL}/${path})`;

const descriptionNode = entry.content.children.find(
child => child.type === 'paragraph'
);

if (!descriptionNode) {
console.warn(`No description found for entry: ${entry.api_doc_source}`);
return `- ${entryLink}`;
}

const description = paragraphToString(descriptionNode).replace(
/[\r\n]+/g,
' '
);

return `- ${entryLink}: ${description}`;
}

/**
* @typedef {Array<ApiDocMetadataEntry>} Input
*
* @type {GeneratorMetadata<Input, string>}
*/
export default {
name: 'llms-txt',
version: '0.1.0',
description: 'Generates a llms.txt file of the API docs',
dependsOn: 'ast',

/**
* @param {Input} input The API documentation metadata
* @param {Partial<GeneratorOptions>} options Generator options
* @returns {Promise<string>} The generated documentation text
*/
async generate(input, options) {
const output = [
'# Node.js Documentation',
'> Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications.',
'## Introduction',
`- [About this documentation](${DOC_API_LATEST_BASE_URL}/api/documentation.md)`,
`- [Usage and example](${DOC_API_LATEST_BASE_URL}/api/synopsis.md)`,
'## API Documentation',
];

const docEntries = input.map(generateDocEntry).filter(Boolean);

output.push(...docEntries);

const resultText = output.join('\n');

if (options.output) {
await writeFile(join(options.output, 'llms.txt'), resultText);
}

return resultText;
},
};
Loading