Skip to content

Commit 2f8a88c

Browse files
authored
Improve diffs on large documents (#196)
This change adds a number of fixups to the build script to make diff .tex files work better. It also enables `math_markup=whole` to make it easier to compare changes in equations (and avoid weird problems where the diff markers break the equation typesetting) Fixes a number of issues reported in #166
1 parent d3e8ed1 commit 2f8a88c

File tree

4 files changed

+187
-251
lines changed

4 files changed

+187
-251
lines changed

build.sh

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -490,21 +490,61 @@ do_md_fixups() {
490490
# TODO: Turn this into a Pandoc filter.
491491
sed -i.bak '0,/\\tableofcontents/s/^# \(.*\)/\\section*\{\U\1\}/g' "${input}"
492492
}
493-
do_tex_fixups() {
493+
494+
# latexdiff is pretty great, but it has some incompatibilities with our template, so we
495+
# unfortunately have to do a lot of massaging of the diff .tex file here.
496+
# In the future, we should explore whether latexdiff can be further configured, our
497+
# our custom extensions can be redesigned to avoid some of these problems.
498+
do_diff_tex_fixups() {
494499
local input=$1
495500
# latexdiff is appending its own generated preamble to our custom one
496501
# (in apparent contradiction of the documentation). Strip it out.
497502
sed -i.bak '/^% End Custom TCG/,/^%DIF END PREAMBLE EXTENSION/d' "${input}"
498503

499504
# latexdiff uses %DIF < and %DIF > to prefix changed lines in code environments
500-
# prefix these lines with + and -
501-
sed -i.bak 's/^%DIF < /%DIF <- /g' "${input}"
502-
sed -i.bak 's/^%DIF > /%DIF >+ /g' "${input}"
503-
504-
# latexdiff' \DIFaddbegin absorbs a space before it.
505-
# This is fairly common (e.g., in the case of an added sentence)
506-
# Preserve them by inserting a space after.
507-
sed -i.bak 's/ \\DIFaddbegin/ \\DIFaddbegin ~/g' "${input}"
505+
# prefix these lines with + and - and replace %DIF with DIFDIFDIFDIF (inside DIFverbatim) so that
506+
# we don't delete the verbatim diff markers when we delete comments below.
507+
sed -i.bak '/\\begin{DIFverbatim}/,/\\end{DIFverbatim}/s/^%DIF < /DIFDIFDIFDIF <- /g' "${input}"
508+
sed -i.bak '/\\begin{DIFverbatim}/,/\\end{DIFverbatim}/s/^%DIF > /DIFDIFDIFDIF >+ /g' "${input}"
509+
510+
# Remove all block begin and end markers after the beginning of the document. See latexdiff.tex for some discussion on this.
511+
# TL;DR: the begin and end markers get put into tricky places, and we don't need to do anything inside those commands.
512+
sed -i.bak '/^\\begin{document}/,$s/\\DIF\(add\|del\|mod\)\(begin\|end\)\(FL\|\) //g ' "${input}"
513+
514+
# latexdiff erroneously puts \DIFadd inside the second argument to \multicolumn.
515+
# Move it out.
516+
sed -i.bak 's/\\multicolumn{\([^{}]*\)}{\\DIFadd{\([^{}]*\|[^{}]*{[^{}]*}\)}}/\\multicolumn{\1}{\2}/g' "${input}"
517+
518+
# Delete all lines containing only comments.
519+
sed -i.bak '/^\s*%.*$/d' "${input}"
520+
521+
# Strip comments (everything after unescaped percent signs) inside of xltabular to make the below steps easier.
522+
sed -i.bak '/\\begin{xltabular}/,/\\end{xltabular}/s/\([^\\]\)%.*$/\1/g' "${input}"
523+
sed -i.bak 's/^%.*$//g' "${input}"
524+
525+
# Combine lines inside of the xltabular environment so that (non-empty) lines all end in \\ or \\*
526+
perl -ne 's/\n/ / if $s = /\\begin{xltabular}/ .. ($e = /\\end{xltabular}/)
527+
and $s > 1 and !$e and !/.*\\\\$/ and !/.*\\\\\*$/;
528+
print' < "${input}" > "${input}".bak && mv "${input}".bak "${input}"
529+
530+
# Put newlines after \endhead, \endfirsthead, \endfoot, and \endlastfoot
531+
sed -i.bak 's/\(\\end\(head\|firsthead\|foot\|lastfoot\)\)/\1\n/g' "${input}"
532+
533+
# latexdiff inserts its markers before \multicolumn sometimes.
534+
# The \multicolumn needs to be the first thing in the cell.
535+
# Swap the order of any \DIF stuff and \multicolumn invocation inside a cell.
536+
sed -i.bak 's/\(\\DIF[^&]*\)\(\\multicolumn{[^{}]*}\({[^{}]*}\|{[^{}]*{[^{}]*}}\)\)/\2\1/g' "${input}"
537+
538+
# latexdiff inserts its markers before \hline sometimes.
539+
# After the transformations above, \hline needs to be the first thing in a line of text.
540+
sed -i.bak 's/\(\s*\)\(.*\)\(\\hline \|\\hlineifmdframed \)\(.*\)/\1\3\2\4/g' "${input}"
541+
542+
# latexdiff inside of \texttt breaks. Prefer \ttfamily.
543+
sed -i.bak 's/\\texttt{/{\\ttfamily /g' "${input}"
544+
545+
# Delete all empty DIFadd/mod/del
546+
sed -i.bak 's/\\DIF\(add\|del\|mod\){}\(FL\|\)//g' "${input}"
547+
508548
}
509549

510550
if test "${DO_GITVERSION}" == "yes"; then
@@ -819,15 +859,15 @@ if [ -n "${DIFFPDF_OUTPUT}" -o -n "${DIFFTEX_OUTPUT}" ]; then
819859
do_latex "${BUILD_DIR}/${INPUT_FILE}" "${TEMP_DIFFBASE_TEX_FILE}" "${EXTRA_PANDOC_OPTIONS} -V keepstaleimages=true"
820860
echo "Running latexdiff... (this may take a while for complex changes)"
821861
start=$(date +%s)
822-
latexdiff-fast --preamble /resources/templates/latexdiff.tex --config /resources/templates/latexdiff.cfg --append-safecmd /resources/templates/latexdiff.safe --exclude-safecmd /resources/templates/latexdiff.unsafe "${TEMP_DIFFBASE_TEX_FILE}" "${TEMP_TEX_FILE}" > "${TEMP_DIFF_TEX_FILE}" 2>"${TEMP_LATEXDIFF_LOG}"
862+
latexdiff-fast --math-markup=whole --preamble /resources/templates/latexdiff.tex --config /resources/templates/latexdiff.cfg --append-safecmd /resources/templates/latexdiff.safe --exclude-safecmd /resources/templates/latexdiff.unsafe "${TEMP_DIFFBASE_TEX_FILE}" "${TEMP_TEX_FILE}" > "${TEMP_DIFF_TEX_FILE}" 2>"${TEMP_LATEXDIFF_LOG}"
823863
end=$(date +%s)
824864
echo "Elapsed time: $(($end-$start)) seconds"
825865
if [ $? -ne 0 ]; then
826866
FAILED=true
827867
>&2 cat "${TEMP_LATEXDIFF_LOG}"
828868
echo "latexdiff failed"
829869
else
830-
do_tex_fixups "${TEMP_DIFF_TEX_FILE}"
870+
do_diff_tex_fixups "${TEMP_DIFF_TEX_FILE}"
831871
if [ -n "${DIFFTEX_OUTPUT}" ]; then
832872
mkdir -p "$(dirname ${SOURCE_DIR}/${DIFFTEX_OUTPUT})"
833873
cp "${TEMP_DIFF_TEX_FILE}" "${SOURCE_DIR}/${DIFFTEX_OUTPUT}"

0 commit comments

Comments
 (0)