Skip to content

Commit 7be9763

Browse files
committed
Fix heredoc implementation
The here-documents in middle of line are replaced with string literal. It's useful for IRB, but it doesn't fit for documentation. For example, def example puts [<<~TILDE, <<-HYPHEN, <<~'TILDE', <<-'HYPHEN', <<'AAA', <<"BBB", <<-`CCC`] a b c d TILDE 1 2 3 4 HYPHEN w x y z TILDE 1 2 3 4 HYPHEN aaa aa a AAA bbb bb b BBB echo 'a' echo 'b' echo 'c' CCC end MRI processes this code and outputs correctly. This is changed in documentation below: def example puts [<<~TILDE, " a b c d TILDE 1 2 3 4 ", <<~'TILDE', " w x y z TILDE 1 2 3 4 ", "aaa aa a ", "bbb bb b ", "echo 'a' echo 'b' echo 'c' "] end I think this is broken document. This commit fixes it.
1 parent 0c47b30 commit 7be9763

File tree

2 files changed

+66
-54
lines changed

2 files changed

+66
-54
lines changed

lib/rdoc/ruby_lex.rb

Lines changed: 64 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ def initialize(content, options)
106106
@rests = []
107107
@seek = 0
108108

109+
@heredoc_queue = []
110+
109111
@indent = 0
110112
@indent_stack = []
111113
@lex_state = :EXPR_BEG
@@ -462,21 +464,43 @@ def lex_init()
462464

463465
@OP.def_rule("\n") do |op, io|
464466
print "\\n\n" if RDoc::RubyLex.debug?
465-
case @lex_state
466-
when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT
467-
@continue = true
468-
else
469-
@continue = false
470-
@lex_state = :EXPR_BEG
471-
until (@indent_stack.empty? ||
472-
[TkLPAREN, TkLBRACK, TkLBRACE,
473-
TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
474-
@indent_stack.pop
467+
unless @heredoc_queue.empty?
468+
info = @heredoc_queue[0]
469+
if !info[:started] # "\n"
470+
info[:started] = true
471+
ungetc "\n"
472+
elsif info[:heredoc_end].nil? # heredoc body
473+
tk, heredoc_end = identify_here_document_body(info[:quoted], info[:lt], info[:indent])
474+
info[:heredoc_end] = heredoc_end
475+
ungetc "\n"
476+
else # heredoc end
477+
@heredoc_queue.shift
478+
@lex_state = :EXPR_BEG
479+
tk = Token(TkHEREDOCEND, info[:heredoc_end])
480+
if !@heredoc_queue.empty?
481+
@heredoc_queue[0][:started] = true
482+
ungetc "\n"
483+
end
475484
end
476485
end
477-
@current_readed = @readed
478-
@here_readed.clear
479-
Token(TkNL)
486+
unless tk
487+
case @lex_state
488+
when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT
489+
@continue = true
490+
else
491+
@continue = false
492+
@lex_state = :EXPR_BEG
493+
until (@indent_stack.empty? ||
494+
[TkLPAREN, TkLBRACK, TkLBRACE,
495+
TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
496+
@indent_stack.pop
497+
end
498+
end
499+
@current_readed = @readed
500+
@here_readed.clear
501+
tk = Token(TkNL)
502+
end
503+
tk
480504
end
481505

482506
@OP.def_rules("*", "**",
@@ -506,8 +530,8 @@ def lex_init()
506530
if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS &&
507531
(@lex_state != :EXPR_ARG || @space_seen)
508532
c = peek(0)
509-
if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
510-
tk = identify_here_document
533+
if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-" || c == "~")
534+
tk = identify_here_document(op)
511535
end
512536
end
513537
unless tk
@@ -985,77 +1009,63 @@ def identify_identifier
9851009
end
9861010
end
9871011

988-
def identify_here_document
1012+
def identify_here_document(op)
9891013
ch = getc
1014+
start_token = op
9901015
# if lt = PERCENT_LTYPE[ch]
991-
if ch == "-"
1016+
if ch == "-" or ch == "~"
1017+
start_token.concat ch
9921018
ch = getc
9931019
indent = true
9941020
end
9951021
if /['"`]/ =~ ch
1022+
start_token.concat ch
9961023
user_quote = lt = ch
9971024
quoted = ""
9981025
while (c = getc) && c != lt
9991026
quoted.concat c
10001027
end
1028+
start_token.concat quoted
1029+
start_token.concat lt
10011030
else
10021031
user_quote = nil
10031032
lt = '"'
10041033
quoted = ch.dup
10051034
while (c = getc) && c =~ /\w/
10061035
quoted.concat c
10071036
end
1037+
start_token.concat quoted
10081038
ungetc
10091039
end
10101040

1011-
ltback, @ltype = @ltype, lt
1012-
reserve = []
1013-
while ch = getc
1014-
reserve.push ch
1015-
if ch == "\\"
1016-
reserve.push ch = getc
1017-
elsif ch == "\n"
1018-
break
1019-
end
1020-
end
1021-
1022-
output_heredoc = reserve.join =~ /\A\r?\n\z/
1041+
@heredoc_queue << {
1042+
quoted: quoted,
1043+
lt: lt,
1044+
indent: indent,
1045+
started: false
1046+
}
1047+
@lex_state = :EXPR_BEG
1048+
Token(RDoc::RubyLex::TkHEREDOCBEG, start_token)
1049+
end
10231050

1024-
if output_heredoc then
1025-
doc = '<<'
1026-
doc << '-' if indent
1027-
doc << "#{user_quote}#{quoted}#{user_quote}\n"
1028-
else
1029-
doc = '"'
1030-
end
1051+
def identify_here_document_body(quoted, lt, indent)
1052+
ltback, @ltype = @ltype, lt
10311053

1032-
@current_readed = @readed
1054+
doc = ""
1055+
heredoc_end = nil
10331056
while l = gets
10341057
l = l.sub(/(:?\r)?\n\z/, "\n")
10351058
if (indent ? l.strip : l.chomp) == quoted
1059+
heredoc_end = l
10361060
break
10371061
end
10381062
doc << l
10391063
end
1064+
raise Error, "Missing terminating #{quoted} for string" unless heredoc_end
10401065

1041-
if output_heredoc then
1042-
raise Error, "Missing terminating #{quoted} for string" unless l
1043-
1044-
doc << l.chomp
1045-
else
1046-
doc << '"'
1047-
end
1048-
1049-
@current_readed = @here_readed
1050-
@here_readed.concat reserve
1051-
while ch = reserve.pop
1052-
ungetc ch
1053-
end
1054-
1055-
token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt]
10561066
@ltype = ltback
1057-
@lex_state = :EXPR_END
1058-
Token(token_class, doc)
1067+
@lex_state = :EXPR_BEG
1068+
[Token(RDoc::RubyLex::TkHEREDOC, doc), heredoc_end]
10591069
end
10601070

10611071
def identify_quotation

lib/rdoc/ruby_token.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,8 @@ def Token(token, value = nil)
329329
[:TkCVAR, TkId],
330330
[:TkIVAR, TkId],
331331
[:TkCONSTANT, TkId],
332+
[:TkHEREDOCBEG, TkId],
333+
[:TkHEREDOCEND, TkId],
332334

333335
[:TkINTEGER, TkVal],
334336
[:TkFLOAT, TkVal],

0 commit comments

Comments
 (0)