Skip to content

Preserve HTML entities in headers #2122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
May 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion assets/css/content/general.css
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,10 @@
.content-inner .section-heading i {
font-size: var(--icon-size);
color: var(--mainLight);
margin-top: 0.1em;
top: -2px;
margin-left: calc(-1 * (var(--icon-size) + var(--icon-spacing)));
padding-right: var(--icon-spacing);
position: relative;
opacity: 0;
}

Expand Down
6 changes: 6 additions & 0 deletions formatters/html/dist/html-elixir-J3PIVQVA.css

Large diffs are not rendered by default.

6 changes: 0 additions & 6 deletions formatters/html/dist/html-elixir-M6JNNWMH.css

This file was deleted.

6 changes: 0 additions & 6 deletions formatters/html/dist/html-erlang-5OIFJN4X.css

This file was deleted.

6 changes: 6 additions & 0 deletions formatters/html/dist/html-erlang-ZK43ZOAC.css

Large diffs are not rendered by default.

264 changes: 163 additions & 101 deletions lib/ex_doc/doc_ast.ex
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import Kernel, except: [to_string: 1]

defmodule ExDoc.DocAST do
# General helpers for dealing with the documentation AST
# (which is the Markdown -> HTML AST).
Expand Down Expand Up @@ -31,35 +33,32 @@ defmodule ExDoc.DocAST do
@doc """
Transform AST into string.
"""
def to_string(ast, fun \\ fn _ast, string -> string end)
def to_string(binary) do
IO.iodata_to_binary(to_iodata(binary))
end

def to_string(binary, _fun) when is_binary(binary) do
defp to_iodata(binary) when is_binary(binary) do
ExDoc.Utils.h(binary)
end

def to_string(list, fun) when is_list(list) do
result = Enum.map_join(list, "", &to_string(&1, fun))
fun.(list, result)
defp to_iodata(list) when is_list(list) do
Enum.map(list, &to_iodata/1)
end

def to_string({:comment, _attrs, inner, _meta} = ast, fun) do
fun.(ast, "<!--#{inner}-->")
defp to_iodata({:comment, _attrs, inner, _meta}) do
["<!--", inner, "-->"]
end

def to_string({tag, attrs, _inner, _meta} = ast, fun) when tag in @void_elements do
result = "<#{tag}#{ast_attributes_to_string(attrs)}/>"
fun.(ast, result)
defp to_iodata({tag, attrs, _inner, _meta}) when tag in @void_elements do
"<#{tag}#{ast_attributes_to_string(attrs)}/>"
end

def to_string({tag, attrs, inner, %{verbatim: true}} = ast, fun) do
inner = Enum.join(inner, "")
result = "<#{tag}#{ast_attributes_to_string(attrs)}>" <> inner <> "</#{tag}>"
fun.(ast, result)
defp to_iodata({tag, attrs, inner, %{verbatim: true}}) do
["<#{tag}#{ast_attributes_to_string(attrs)}>", inner, "</#{tag}>"]
end

def to_string({tag, attrs, inner, _meta} = ast, fun) do
result = "<#{tag}#{ast_attributes_to_string(attrs)}>" <> to_string(inner, fun) <> "</#{tag}>"
fun.(ast, result)
defp to_iodata({tag, attrs, inner, _meta}) do
["<#{tag}#{ast_attributes_to_string(attrs)}>", to_iodata(inner), "</#{tag}>"]
end

defp ast_attributes_to_string(attrs) do
Expand All @@ -86,17 +85,17 @@ defmodule ExDoc.DocAST do
case content do
# if we already have <pre><code>...</code></pre>, carry on
[{:code, _, _}] ->
{:pre, attrs, parse_erl_ast(content), %{}}
{:pre, attrs, parse_erl_ast(List.wrap(content)), %{}}

# otherwise, turn <pre>...</pre> into <pre><code>...</code></pre>
_ ->
content = [{:code, [], parse_erl_ast(content), %{}}]
content = [{:code, [], parse_erl_ast(List.wrap(content)), %{}}]
{:pre, attrs, content, %{}}
end
end

defp parse_erl_ast({tag, attrs, content}) when is_atom(tag) do
{tag, attrs, parse_erl_ast(content), %{}}
{tag, attrs, parse_erl_ast(List.wrap(content)), %{}}
end

@doc """
Expand All @@ -110,6 +109,49 @@ defmodule ExDoc.DocAST do
def extract_title([{:h1, _attrs, inner, _meta} | ast]), do: {:ok, inner, ast}
def extract_title(_ast), do: :error

@doc """
Extracts the headers which have anchors (aka ids) in them.
"""
def extract_headers_with_ids(ast, headers) do
ast
|> reduce_tags([], fn {tag, attrs, inner, _}, acc ->
with true <- tag in headers,
id = Keyword.get(attrs, :id, ""),
text = ExDoc.DocAST.text(inner),
true <- id != "" and text != "" do
[{tag, text, id} | acc]
else
_ -> acc
end
end)
|> Enum.reverse()
end

@doc """
Adds an id attribute to the given headers.

A prefix for the id attribute can be given,
which is automatically URL encoded to avoid
issues.
"""
def add_ids_to_headers(doc_ast, headers, prefix \\ "") do
prefix = URI.encode(prefix)

doc_ast
|> map_reduce_tags(%{}, fn {tag, attrs, inner, meta} = ast, seen ->
if tag in headers and not Keyword.has_key?(attrs, :id) do
possible_id = inner |> text() |> ExDoc.Utils.text_to_id()
id_count = Map.get(seen, possible_id, 0)
partial_id = if id_count >= 1, do: "#{possible_id}-#{id_count}", else: possible_id
seen = Map.put(seen, possible_id, id_count + 1)
{{tag, [id: prefix <> partial_id] ++ attrs, inner, meta}, seen}
else
{ast, seen}
end
end)
|> elem(0)
end

@doc """
Compute a synopsis from a document by looking at its first paragraph.
"""
Expand All @@ -132,41 +174,43 @@ defmodule ExDoc.DocAST do
@doc """
Remove ids from elements.
"""
def remove_ids({tag, attrs, inner, meta}),
do: {tag, Keyword.delete(attrs, :href), remove_ids(inner), meta}

def remove_ids(list) when is_list(list),
do: Enum.map(list, &remove_ids/1)

def remove_ids(other),
do: other
def remove_ids(ast) do
map_tags(ast, fn {tag, attrs, inner, meta} ->
{tag, Keyword.delete(attrs, :href), inner, meta}
end)
end

@doc """
Returns text content from the given AST.
"""
def text(ast) do
def text(ast, joiner \\ "") do
ast
|> do_text()
|> do_text(joiner)
|> IO.iodata_to_binary()
|> String.trim()
end

defp do_text(ast) when is_list(ast), do: Enum.map(ast, &do_text/1)
defp do_text(ast) when is_binary(ast), do: ast
defp do_text({_tag, _attr, ast, _meta}), do: text(ast)
defp do_text(ast, joiner) when is_list(ast),
do: Enum.map_intersperse(ast, joiner, &do_text(&1, joiner))

defp do_text(ast, _joiner) when is_binary(ast),
do: ast

defp do_text({_tag, _attr, ast, _meta}, joiner),
do: do_text(ast, joiner)

@doc """
Wraps a list of HTML nodes into `<section>` tags whenever `matcher` returns true.
Wraps a list of HTML nodes into `<section>` tags whenever `headers` returns true.
"""
def sectionize(list, matcher), do: sectionize(list, matcher, [])
def sectionize(list, headers), do: sectionize(list, headers, [])

defp sectionize(list, matcher, acc) do
case pivot(list, acc, matcher) do
defp sectionize(list, headers, acc) do
case pivot(list, acc, headers) do
{acc, {header_tag, header_attrs, _, _} = header, rest} ->
{inner, rest} = Enum.split_while(rest, &not_tag?(&1, header_tag))
class = String.trim_trailing("#{header_tag} #{header_attrs[:class]}")
section = {:section, [class: class], [header | sectionize(inner, matcher, [])], %{}}
sectionize(rest, matcher, [section | acc])
section = {:section, [class: class], [header | sectionize(inner, headers, [])], %{}}
sectionize(rest, headers, [section | acc])

acc ->
acc
Expand All @@ -176,53 +220,63 @@ defmodule ExDoc.DocAST do
defp not_tag?({tag, _, _, _}, tag), do: false
defp not_tag?(_, _tag), do: true

defp pivot([head | tail], acc, fun) do
case fun.(head) do
true -> {acc, head, tail}
false -> pivot(tail, [head | acc], fun)
defp pivot([{tag, _, _, _} = head | tail], acc, headers) do
if tag in headers do
{acc, head, tail}
else
pivot(tail, [head | acc], headers)
end
end

defp pivot([], acc, _fun), do: Enum.reverse(acc)
defp pivot([head | tail], acc, headers), do: pivot(tail, [head | acc], headers)
defp pivot([], acc, _headers), do: Enum.reverse(acc)

@doc """
Highlights a DocAST converted to string.
Highlights the code blocks in the AST.
"""
# TODO: Could this be done over the AST instead?
def highlight(html, language, opts \\ []) do
def highlight(ast, language, opts \\ []) do
highlight_info = language.highlight_info()

## Html cannot be parsed with regex, but we try our best...
Regex.replace(
~r/<pre(\s[^>]*)?><code(?:\s+class="([^"\s]*)")?>([^<]*)<\/code><\/pre>/,
html,
&highlight_code_block(&1, &2, &3, &4, highlight_info, opts)
)
end

defp highlight_code_block(full_block, pre_attr, lang, code, highlight_info, outer_opts) do
case pick_language_and_lexer(lang, highlight_info, code) do
{_language, nil, _opts} ->
full_block

{lang, lexer, opts} ->
try do
render_code(pre_attr, lang, lexer, opts, code, outer_opts)
rescue
exception ->
ExDoc.Utils.warn(
[
"crashed while highlighting #{lang} snippet:\n\n",
full_block,
"\n\n",
Exception.format_banner(:error, exception, __STACKTRACE__)
],
__STACKTRACE__
)

full_block
map_tags(ast, fn
{:pre, pre_attrs, [{:code, code_attrs, [code], code_meta}], pre_meta} = ast
when is_binary(code) ->
{lang, code_attrs} = Keyword.pop(code_attrs, :class, "")

case pick_language_and_lexer(lang, highlight_info, code) do
{_lang, nil, _lexer_opts} ->
ast

{lang, lexer, lexer_opts} ->
try do
Makeup.highlight_inner_html(code,
lexer: lexer,
lexer_options: lexer_opts,
formatter_options: opts
)
rescue
exception ->
ExDoc.Utils.warn(
[
"crashed while highlighting #{lang} snippet:\n\n",
ExDoc.DocAST.to_string(ast),
"\n\n",
Exception.format_banner(:error, exception, __STACKTRACE__)
],
__STACKTRACE__
)

ast
else
highlighted ->
code_attrs = [class: "makeup #{lang}", translate: "no"] ++ code_attrs
code_meta = Map.put(code_meta, :verbatim, true)
{:pre, pre_attrs, [{:code, code_attrs, [highlighted], code_meta}], pre_meta}
end
end
end

ast ->
ast
end)
end

defp pick_language_and_lexer("", _highlight_info, "$ " <> _) do
Expand All @@ -244,35 +298,43 @@ defmodule ExDoc.DocAST do
end
end

defp render_code(pre_attr, lang, lexer, lexer_opts, code, opts) do
highlight_tag = Keyword.get(opts, :highlight_tag, "span")
## Traversal helpers

highlighted =
code
|> unescape_html()
|> IO.iodata_to_binary()
|> Makeup.highlight_inner_html(
lexer: lexer,
lexer_options: lexer_opts,
formatter_options: [highlight_tag: highlight_tag]
)
@doc """
Maps the tags in the AST, first mapping children tags, then the tag itself.
"""
def map_tags({tag, attrs, inner, meta}, fun),
do: fun.({tag, attrs, Enum.map(inner, &map_tags(&1, fun)), meta})
Comment on lines +306 to +307
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def map_tags({tag, attrs, inner, meta}, fun),
do: fun.({tag, attrs, Enum.map(inner, &map_tags(&1, fun)), meta})
def map_tags({tag, attrs, inner, meta}, fun) when is_list(inner),
do: fun.({tag, attrs, Enum.map(inner, &map_tags(&1, fun)), meta})
def map_tags({tag, attrs, inner, meta}, fun),
do: fun.({tag, attrs, inner, meta})

When edoc is used to generate the doc ast, inner can be a string without being wrapped in a list.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I basically wrote all of the traversal functions assuming that the contents is a list. :( Any chance we could normalize edoc in the long term? I will also improve the traversal functions here meanwhile.


~s(<pre#{pre_attr}><code class="makeup #{lang}" translate="no">#{highlighted}</code></pre>)
end
def map_tags(list, fun) when is_list(list),
do: Enum.map(list, &map_tags(&1, fun))

entities = [{"&amp;", ?&}, {"&lt;", ?<}, {"&gt;", ?>}, {"&quot;", ?"}, {"&#39;", ?'}]
def map_tags(other, _fun),
do: other

for {encoded, decoded} <- entities do
defp unescape_html(unquote(encoded) <> rest) do
[unquote(decoded) | unescape_html(rest)]
end
end
@doc """
Reduces the tags in the AST, first reducing children tags, then the tag itself.
"""
def reduce_tags({tag, attrs, inner, meta}, acc, fun),
do: fun.({tag, attrs, inner, meta}, Enum.reduce(inner, acc, &reduce_tags(&1, &2, fun)))

defp unescape_html(<<c, rest::binary>>) do
[c | unescape_html(rest)]
end
def reduce_tags(list, acc, fun) when is_list(list),
do: Enum.reduce(list, acc, &reduce_tags(&1, &2, fun))

def reduce_tags(_other, acc, _fun),
do: acc

defp unescape_html(<<>>) do
[]
@doc """
Map-reduces the tags in the AST, first mapping children tags, then the tag itself.
"""
def map_reduce_tags({tag, attrs, inner, meta}, acc, fun) do
{inner, acc} = Enum.map_reduce(inner, acc, &map_reduce_tags(&1, &2, fun))
fun.({tag, attrs, inner, meta}, acc)
end

def map_reduce_tags(list, acc, fun) when is_list(list),
do: Enum.map_reduce(list, acc, &map_reduce_tags(&1, &2, fun))

def map_reduce_tags(other, acc, _fun),
do: {other, acc}
end
10 changes: 4 additions & 6 deletions lib/ex_doc/formatter/epub.ex
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,10 @@ defmodule ExDoc.Formatter.EPUB do

defp generate_extras(config) do
for {_title, extras} <- config.extras,
extra_config <- extras,
not is_map_key(extra_config, :url) do
%{id: id, title: title, title_content: title_content, content: content} = extra_config

output = "#{config.output}/OEBPS/#{id}.xhtml"
html = Templates.extra_template(config, title, title_content, content)
node <- extras,
not is_map_key(node, :url) and node.type != :cheatmd do
output = "#{config.output}/OEBPS/#{node.id}.xhtml"
html = Templates.extra_template(config, node)

if File.regular?(output) do
Utils.warn("file #{Path.relative_to_cwd(output)} already exists", [])
Expand Down
Loading