[lex] Replace \term with \placeholder or \defn as appropriate (#1067)

jensmaurer · tkoeppe · commit b8e0e09f33d8 · 2016-11-18T17:46:58.000Z
Partially addresses #329.
diff --git a/source/lex.tex b/source/lex.tex
@@ -20,14 +20,16 @@
 \indextext{pointer literal|see{literal, pointer}}
 \indextext{user-defined literal|see{literal, user-defined}}
 \indextext{file, source|see{source file}}
+\indextext{null character|see{character, null}}
+\indextext{null wide character|see{wide-character, null}}
 
 \rSec1[lex.separate]{Separate translation}
 
 \pnum
 \indextext{conventions!lexical|(}%
 \indextext{compilation!separate|(}%
 The text of the program is kept in units called
-\indextext{source file}\term{source files} in this International
+\defnx{source files}{source file} in this International
 Standard. A source file together with all the headers~(\ref{headers})
 and source files included~(\ref{cpp.include}) via the preprocessing
 directive \tcode{\#include}, less any source lines skipped by any of the
@@ -56,7 +58,6 @@
 occur, although in practice different phases might be folded together.}
 
 \begin{enumerate}
-\indextext{source file}%
 \indextext{character!source file}%
 \indextext{character set!basic source}%
 \item Physical source file characters are mapped, in an
@@ -174,8 +175,7 @@
 
 \pnum
 \indextext{character set|(}%
-\indextext{character set!basic source}%
-The \term{basic source character set} consists of 96 characters: the space character,
+The \defnx{basic source character set}{character set!basic source} consists of 96 characters: the space character,
 the control characters representing horizontal tab, vertical tab, form feed, and
 new-line, plus the following 91 graphical characters:\footnote{The glyphs for
 the members of the basic source character set are intended to
@@ -229,17 +229,18 @@
 \grammarterm{universal-character-name}.}
 
 \pnum
-The \term{basic execution character set} and the \term{basic
-execution wide-character set} shall each contain all the members of the
+The \defnx{basic execution character set}{character set!basic execution} and the
+\defnx{basic execution wide-character set}{wide-character set!basic execution}
+shall each contain all the members of the
 basic source character set, plus control characters representing alert,
-backspace, and carriage return, plus a \term{null character}
-(respectively, \term{null wide character}), whose value is 0.
+backspace, and carriage return, plus a \defnx{null character}{character!null}
+(respectively, \defnx{null wide character}{wide-character!null}), whose value is 0.
 For each basic execution character set, the values of the
 members shall be non-negative and distinct from one another. In both the
 source and execution basic character sets, the value of each character
 after \tcode{0} in the above list of decimal digits shall be one greater
-than the value of the previous. The \term{execution character set}
-and the \term{execution wide-character set} are
+than the value of the previous. The \defnx{execution character set}{character set!execution}
+and the \defnx{execution wide-character set}{wide-character set!execution} are
 \impldef{execution character set and execution wide-character set}
 supersets of the
 basic execution character set and the basic execution wide-character
@@ -930,26 +931,22 @@
 \pnum
 \indextext{literal!\idxcode{unsigned}}%
 \indextext{literal!\idxcode{long}}%
-\indextext{literal!integer}%
-\indextext{literal!binary}%
-\indextext{literal!octal}%
-\indextext{literal!decimal}%
-\indextext{literal!hexadecimal}%
 \indextext{literal!base~of integer}%
-An \term{integer literal} is a sequence of digits that has no period
+An \defnx{integer literal}{literal!integer} is a sequence of digits that has no period
 or exponent part, with optional separating single quotes that are ignored
 when determining its value. An integer literal may have a prefix that specifies
 its base and a suffix that specifies its type. The lexically first digit
 of the sequence of digits is the most significant.
-A \term{binary} integer literal (base two) begins with
+A \defnx{binary integer literal}{literal!binary} (base two) begins with
 \tcode{0b} or \tcode{0B} and consists of a sequence of binary digits.
-An \term{octal} integer
-literal (base eight) begins with the digit \tcode{0} and consists of a
+An \defnx{octal integer literal}{literal!octal}
+(base eight) begins with the digit \tcode{0} and consists of a
 sequence of octal digits.\footnote{The digits \tcode{8} and \tcode{9} are not octal digits. }
-A \term{decimal}
-integer literal (base ten) begins with a digit other than \tcode{0} and
+A \defnx{decimal integer literal}{literal!decimal}
+(base ten) begins with a digit other than \tcode{0} and
 consists of a sequence of decimal digits.
-A \term{hexadecimal} integer literal (base sixteen) begins with
+A \defnx{hexadecimal integer literal}{literal!hexadecimal}
+(base sixteen) begins with
 \tcode{0x} or \tcode{0X} and consists of a sequence of hexadecimal
 digits, which include the decimal digits and the letters \tcode{a}
 through \tcode{f} and \tcode{A} through \tcode{F} with decimal values
@@ -1358,10 +1355,8 @@
 The integer and fraction parts both consist of
 a sequence of decimal (base ten) digits if there is no prefix, or
 hexadecimal (base sixteen) digits if the prefix is \tcode{0x} or \tcode{0X}.
-\indextext{literal!decimal floating}%
-The literal is a \term{decimal floating literal} in the former case and
-\indextext{literal!hexadecimal floating}%
-a \term{hexadecimal floating literal} in the latter case.
+The literal is a \defnx{decimal floating literal}{literal!decimal floating} in the former case and
+a \defnx{hexadecimal floating literal}{literal!hexadecimal floating} in the latter case.
 Optional separating single quotes in
 a \grammarterm{digit-sequence} or \grammarterm{hexadecimal-digit-sequence}
 are ignored when determining its value.
@@ -1558,7 +1553,7 @@
 also referred to as narrow
 string literals. A narrow string literal has type
 \indextext{literal!string!type~of}%
-``array of \term{n} \tcode{const char}'', where \term{n} is the size of
+``array of \placeholder{n} \tcode{const char}'', where \placeholder{n} is the size of
 the string as defined below, and has static storage
 duration~(\ref{basic.stc}).
 
@@ -1573,7 +1568,7 @@
 \indextext{prefix!\idxcode{u}}%
 such as \tcode{u"asdf"}, is
 a \tcode{char16_t} string literal. A \tcode{char16_t} string literal has
-type ``array of \term{n} \tcode{const char16_t}'', where \term{n} is the
+type ``array of \placeholder{n} \tcode{const char16_t}'', where \placeholder{n} is the
 size of the string as defined below; it
 is initialized with the given characters. A single \grammarterm{c-char} may
 produce more than one \tcode{char16_t} character in the form of
@@ -1585,7 +1580,7 @@
 \indextext{prefix!\idxcode{U}}%
 such as \tcode{U"asdf"}, is
 a \tcode{char32_t} string literal. A \tcode{char32_t} string literal has
-type ``array of \term{n} \tcode{const char32_t}'', where \term{n} is the
+type ``array of \placeholder{n} \tcode{const char32_t}'', where \placeholder{n} is the
 size of the string as defined below; it
 is initialized with the given characters.
 
@@ -1598,8 +1593,8 @@
 \indextext{\idxcode{wchar_t}}%
 \indextext{literal!string!wide}%
 \indextext{prefix!\idxcode{L}}%
-A wide string literal has type ``array of \term{n} \tcode{const
-wchar_t}'', where \term{n} is the size of the string as defined below; it
+A wide string literal has type ``array of \placeholder{n} \tcode{const
+wchar_t}'', where \placeholder{n} is the size of the string as defined below; it
 is initialized with the given characters.
 
 \pnum
@@ -1654,13 +1649,12 @@
 \pnum
 \indextext{\idxcode{0}|seealso{zero,~null}}%
 \indextext{\idxcode{0}!string terminator}%
-\indextext{\idxcode{0}!null~character}%
+\indextext{\idxcode{0}!null~character|see {character, null}}%
 After any necessary concatenation, in translation phase
 7~(\ref{lex.phases}), \tcode{'\textbackslash 0'} is appended to every
 string literal so that programs that scan a string can find its end.
 
 \pnum
-\indextext{encoding!multibyte}%
 Escape sequences and \grammarterm{universal-character-name}{s} in non-raw string literals
 have the same meaning as in character literals~(\ref{lex.ccon}), except that
 the single quote \tcode{'} is representable either by itself or by the escape sequence
@@ -1670,7 +1664,7 @@
 \tcode{char16_t} string literal may yield a surrogate pair.
 \indextext{string!\idxcode{sizeof}}%
 In a narrow string literal, a \grammarterm{universal-character-name} may map to more
-than one \tcode{char} element due to \term{multibyte encoding}. The
+than one \tcode{char} element due to \defnx{multibyte encoding}{encoding!multibyte}. The
 size of a \tcode{char32_t} or wide string literal is the total number of
 escape sequences, \grammarterm{universal-character-name}{s}, and other characters, plus
 one for the terminating \tcode{U'\textbackslash 0'} or
@@ -1786,93 +1780,93 @@
 \pnum
 A \grammarterm{user-defined-literal} is treated as a call to a literal operator or
 literal operator template~(\ref{over.literal}). To determine the form of this call for a
-given \grammarterm{user-defined-literal} \term{L} with \grammarterm{ud-suffix} \term{X},
-the \grammarterm{literal-operator-id} whose literal suffix identifier is \term{X} is
-looked up in the context of \term{L} using the rules for unqualified name
-lookup~(\ref{basic.lookup.unqual}). Let \term{S} be the set of declarations found by
-this lookup. \term{S} shall not be empty.
+given \grammarterm{user-defined-literal} \placeholder{L} with \grammarterm{ud-suffix} \placeholder{X},
+the \grammarterm{literal-operator-id} whose literal suffix identifier is \placeholder{X} is
+looked up in the context of \placeholder{L} using the rules for unqualified name
+lookup~(\ref{basic.lookup.unqual}). Let \placeholder{S} be the set of declarations found by
+this lookup. \placeholder{S} shall not be empty.
 
 \pnum
-If \term{L} is a \grammarterm{user-defined-integer-literal}, let \term{n} be the literal
-without its \grammarterm{ud-suffix}. If \term{S} contains a literal operator with
-parameter type \tcode{unsigned long long}, the literal \term{L} is treated as a call of
+If \placeholder{L} is a \grammarterm{user-defined-integer-literal}, let \placeholder{n} be the literal
+without its \grammarterm{ud-suffix}. If \placeholder{S} contains a literal operator with
+parameter type \tcode{unsigned long long}, the literal \placeholder{L} is treated as a call of
 the form
 
 \begin{codeblock}
-operator "" @\term{X}@(@\term{n}@ULL)
+operator "" @\placeholder{X}@(@\placeholder{n}@ULL)
 \end{codeblock}
 
-Otherwise, \term{S} shall contain a raw literal operator or a literal operator
-template~(\ref{over.literal}) but not both. If \term{S} contains a raw literal operator,
-the literal \term{L} is treated as a call of the form
+Otherwise, \placeholder{S} shall contain a raw literal operator or a literal operator
+template~(\ref{over.literal}) but not both. If \placeholder{S} contains a raw literal operator,
+the literal \placeholder{L} is treated as a call of the form
 
 \begin{codeblock}
-operator "" @\term{X}@(@"\term{n}{"}@)
+operator "" @\placeholder{X}@(@"\placeholder{n}{"}@)
 \end{codeblock}
 
-Otherwise (\term{S} contains a literal operator template), \term{L} is treated as a call
+Otherwise (\placeholder{S} contains a literal operator template), \placeholder{L} is treated as a call
 of the form
 
 
 \begin{codeblock}
-operator "" @\term{X}@<'@$c_1$@', '@$c_2$@', ... '@$c_k$@'>()
+operator "" @\placeholder{X}@<'@$c_1$@', '@$c_2$@', ... '@$c_k$@'>()
 \end{codeblock}
 
-where \term{n} is the source character sequence $c_1c_2...c_k$. \begin{note} The sequence
+where \placeholder{n} is the source character sequence $c_1c_2...c_k$. \begin{note} The sequence
 $c_1c_2...c_k$ can only contain characters from the basic source character set.
 \end{note}
 
 \pnum
-If \term{L} is a \grammarterm{user-defined-floating-literal}, let \term{f} be the
-literal without its \grammarterm{ud-suffix}. If \term{S} contains a literal operator
-with parameter type \tcode{long double}, the literal \term{L} is treated as a call of
+If \placeholder{L} is a \grammarterm{user-defined-floating-literal}, let \placeholder{f} be the
+literal without its \grammarterm{ud-suffix}. If \placeholder{S} contains a literal operator
+with parameter type \tcode{long double}, the literal \placeholder{L} is treated as a call of
 the form
 
 \begin{codeblock}
-operator "" @\term{X}@(@\term{f}@L)
+operator "" @\placeholder{X}@(@\placeholder{f}@L)
 \end{codeblock}
 
-Otherwise, \term{S} shall contain a raw literal operator or a literal operator
-template~(\ref{over.literal}) but not both. If \term{S} contains a raw literal operator,
-the \term{literal} \term{L} is treated as a call of the form
+Otherwise, \placeholder{S} shall contain a raw literal operator or a literal operator
+template~(\ref{over.literal}) but not both. If \placeholder{S} contains a raw literal operator,
+the \grammarterm{literal} \placeholder{L} is treated as a call of the form
 
 \begin{codeblock}
-operator "" @\term{X}@(@"\term{f}{"}@)
+operator "" @\placeholder{X}@(@"\placeholder{f}{"}@)
 \end{codeblock}
 
-Otherwise (\term{S} contains a literal operator template), \term{L} is treated as a call
+Otherwise (\placeholder{S} contains a literal operator template), \placeholder{L} is treated as a call
 of the form
 
 \begin{codeblock}
-operator "" @\term{X}@<'@$c_1$@', '@$c_2$@', ... '@$c_k$@'>()
+operator "" @\placeholder{X}@<'@$c_1$@', '@$c_2$@', ... '@$c_k$@'>()
 \end{codeblock}
 
-where \term{f} is the source character sequence $c_1c_2...c_k$. \begin{note} The sequence
+where \placeholder{f} is the source character sequence $c_1c_2...c_k$. \begin{note} The sequence
 $c_1c_2...c_k$ can only contain characters from the basic source character set.
 \end{note}
 
 \pnum
-If \term{L} is a \grammarterm{user-defined-string-literal}, let \term{str} be the
-literal without its \grammarterm{ud-suffix} and let \term{len} be
+If \placeholder{L} is a \grammarterm{user-defined-string-literal}, let \placeholder{str} be the
+literal without its \grammarterm{ud-suffix} and let \placeholder{len} be
 the number of
-code units in \term{str} (i.e., its length excluding the terminating
+code units in \placeholder{str} (i.e., its length excluding the terminating
 null character).
- The literal \term{L} is treated as a call of the form
+ The literal \placeholder{L} is treated as a call of the form
 
 \begin{codeblock}
-operator "" @\term{X}@(@\term{str}{}@, @\term{len}{}@)
+operator "" @\placeholder{X}@(@\placeholder{str}{}@, @\placeholder{len}{}@)
 \end{codeblock}
 
 \pnum
-If \term{L} is a \grammarterm{user-defined-character-literal}, let \term{ch} be the
+If \placeholder{L} is a \grammarterm{user-defined-character-literal}, let \placeholder{ch} be the
 literal without its \grammarterm{ud-suffix}.
-\term{S} shall contain a literal operator~(\ref{over.literal}) whose only parameter has
-the type of \term{ch} and the
-literal \term{L} is treated as a call
+\placeholder{S} shall contain a literal operator~(\ref{over.literal}) whose only parameter has
+the type of \placeholder{ch} and the
+literal \placeholder{L} is treated as a call
 of the form
 
 \begin{codeblock}
-operator "" @\term{X}@(@\term{ch}{}@)
+operator "" @\placeholder{X}@(@\placeholder{ch}{}@)
 \end{codeblock}
 
 \pnum