Skip to content

feat: create llms.txt generator #254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 3, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/constants.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ export const DOC_NODE_VERSION = process.version;
// This is the Node.js CHANGELOG to be consumed to generate a list of all major Node.js versions
export const DOC_NODE_CHANGELOG_URL =
'https://raw.githubusercontent.com/nodejs/node/HEAD/CHANGELOG.md';

// The base URL for the latest Node.js documentation
export const LATEST_DOC_API_BASE_URL = 'https://nodejs.org/docs/latest/';
2 changes: 2 additions & 0 deletions src/generators/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import addonVerify from './addon-verify/index.mjs';
import apiLinks from './api-links/index.mjs';
import oramaDb from './orama-db/index.mjs';
import astJs from './ast-js/index.mjs';
import llmsTxt from './llms-txt/index.mjs';

export const publicGenerators = {
'json-simple': jsonSimple,
Expand All @@ -21,6 +22,7 @@ export const publicGenerators = {
'addon-verify': addonVerify,
'api-links': apiLinks,
'orama-db': oramaDb,
'llms-txt': llmsTxt,
};

export const allGenerators = {
Expand Down
6 changes: 6 additions & 0 deletions src/generators/llms-txt/constants.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// These files are not part of the API documentation and are manually included
// in the llms.txt file
export const ENTRY_IGNORE_LIST = [
'doc/api/synopsis.md',
'doc/api/documentation.md',
];
64 changes: 64 additions & 0 deletions src/generators/llms-txt/index.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { readFile, writeFile } from 'node:fs/promises';
import { join } from 'node:path';

import { buildApiDocLink } from './utils/buildApiDocLink.mjs';
import { ENTRY_IGNORE_LIST } from './constants.mjs';
import { getIntroLinks } from './utils/getIntroLinks.mjs';

/**
* This generator generates a llms.txt file to provide information to LLMs at
* inference time
*
* @typedef {Array<ApiDocMetadataEntry>} Input
*
* @type {GeneratorMetadata<Input, string>}
*/
export default {
name: 'llms-txt',

version: '1.0.0',

description:
'Generates a llms.txt file to provide information to LLMs at inference time',

dependsOn: 'ast',

/**
* Generates a llms.txt file
*
* @param {Input} entries
* @param {Partial<GeneratorOptions>} options
* @returns {Promise<void>}
*/
async generate(entries, { output }) {
const template = await readFile(
join(import.meta.dirname, 'template.txt'),
'utf-8'
);

const introLinks = getIntroLinks().join('\n');

const apiDocsLinks = entries
.filter(entry => {
// Filter non top-level headings and ignored entries
return (
entry.heading.depth === 1 || ENTRY_IGNORE_LIST.includes(entry.path)
);
})
.map(entry => {
const link = buildApiDocLink(entry);
return `- ${link}`;
})
.join('\n');

const filledTemplate = template
.replace('__INTRODUCTION__', introLinks)
.replace('__API_DOCS__', apiDocsLinks);

if (output) {
await writeFile(join(output, 'llms.txt'), filledTemplate);
}

return filledTemplate;
},
};
9 changes: 9 additions & 0 deletions src/generators/llms-txt/template.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Node.js Documentation

> Node.js is an open-source, cross-platform JavaScript runtime environment that executes JavaScript code outside a web browser. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient for building scalable network applications.

## Introduction
__INTRODUCTION__

## API Documentations
__API_DOCS__
33 changes: 33 additions & 0 deletions src/generators/llms-txt/utils/buildApiDocLink.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { LATEST_DOC_API_BASE_URL } from '../../../constants.mjs';
import { transformNodeToString } from '../../../utils/unist.mjs';

/**
* Builds a markdown link for an API doc entry
*
* @param {ApiDocMetadataEntry} entry
* @returns {string}
*/
export const buildApiDocLink = entry => {
const title = entry.heading.data.name;

// Remove the leading doc/ from the path
const path = entry.api_doc_source.replace(/^doc\//, '');
const url = new URL(path, LATEST_DOC_API_BASE_URL);

const link = `[${title}](${url})`;

// Find the first paragraph in the content
const descriptionNode = entry.content.children.find(
child => child.type === 'paragraph'
);

if (!descriptionNode) {
return link;
}

const description = transformNodeToString(descriptionNode)
// Remove newlines and extra spaces
.replace(/[\r\n]+/g, ' ');

return `${link}: ${description}`;
};
18 changes: 18 additions & 0 deletions src/generators/llms-txt/utils/getIntroLinks.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { LATEST_DOC_API_BASE_URL } from '../constants.mjs';

/**
* Generates a list of introduction links for the llms.txt file
*
* @returns {string[]}
*/
export const getIntroLinks = () => {
const aboutDocUrl = new URL('/api/documentation.md', LATEST_DOC_API_BASE_URL);
const usageExamplesUrl = new URL('/api/synopsis.md', LATEST_DOC_API_BASE_URL);

const introLinks = [
`- [About this documentation](${aboutDocUrl})`,
`- [Usage and examples](${usageExamplesUrl})`,
];

return introLinks;
};
6 changes: 3 additions & 3 deletions src/types.d.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import type { Heading, Root } from '@types/mdast';
import type { Heading, Root } from 'mdast';
import type { Program } from 'acorn';
import type { SemVer } from 'semver';
import type { Data, Node, Parent, Position } from 'unist';

// Unist Node with typed Data, which allows better type inference
interface NodeWithData<T extends Node, J extends Data> extends T {
type NodeWithData<T extends Node, J extends Data> = T & {
data: J;
}
};

declare global {
export interface StabilityIndexMetadataEntry {
Expand Down
34 changes: 20 additions & 14 deletions src/utils/unist.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@

import { pointEnd, pointStart } from 'unist-util-position';

/**
* Extracts text content from a node recursively
*
* @param {import('unist').Node} node The Node to be transformed into a string
* @returns {string} The transformed Node as a string
*/
export const transformNodeToString = node => {
switch (node.type) {
case 'inlineCode':
return `\`${node.value}\``;
case 'strong':
return `**${transformNodesToString(node.children)}**`;
case 'emphasis':
return `_${transformNodesToString(node.children)}_`;
default:
return node.children ? transformNodesToString(node.children) : node.value;
}
};

/**
* This utility allows us to join children Nodes into one
* and transfor them back to what their source would look like
Expand All @@ -10,20 +29,7 @@ import { pointEnd, pointStart } from 'unist-util-position';
* @returns {string} The parsed and joined nodes as a string
*/
export const transformNodesToString = nodes => {
const mappedChildren = nodes.map(node => {
switch (node.type) {
case 'inlineCode':
return `\`${node.value}\``;
case 'strong':
return `**${transformNodesToString(node.children)}**`;
case 'emphasis':
return `_${transformNodesToString(node.children)}_`;
default:
return node.children
? transformNodesToString(node.children)
: node.value;
}
});
const mappedChildren = nodes.map(node => transformNodeToString(node));

return mappedChildren.join('');
};
Expand Down
Loading