Skip to content

Commit b8fdc60

Browse files
authored
Merge pull request #479 from aycabta/fix-heredoc
Fix heredoc
2 parents 68bd8f5 + b0a0554 commit b8fdc60

File tree

3 files changed

+93
-70
lines changed

3 files changed

+93
-70
lines changed

lib/rdoc/ruby_lex.rb

Lines changed: 64 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ def initialize(content, options)
106106
@rests = []
107107
@seek = 0
108108

109+
@heredoc_queue = []
110+
109111
@indent = 0
110112
@indent_stack = []
111113
@lex_state = :EXPR_BEG
@@ -464,21 +466,43 @@ def lex_init()
464466

465467
@OP.def_rule("\n") do |op, io|
466468
print "\\n\n" if RDoc::RubyLex.debug?
467-
case @lex_state
468-
when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT
469-
@continue = true
470-
else
471-
@continue = false
472-
@lex_state = :EXPR_BEG
473-
until (@indent_stack.empty? ||
474-
[TkLPAREN, TkLBRACK, TkLBRACE,
475-
TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
476-
@indent_stack.pop
469+
unless @heredoc_queue.empty?
470+
info = @heredoc_queue[0]
471+
if !info[:started] # "\n"
472+
info[:started] = true
473+
ungetc "\n"
474+
elsif info[:heredoc_end].nil? # heredoc body
475+
tk, heredoc_end = identify_here_document_body(info[:quoted], info[:lt], info[:indent])
476+
info[:heredoc_end] = heredoc_end
477+
ungetc "\n"
478+
else # heredoc end
479+
@heredoc_queue.shift
480+
@lex_state = :EXPR_BEG
481+
tk = Token(TkHEREDOCEND, info[:heredoc_end])
482+
if !@heredoc_queue.empty?
483+
@heredoc_queue[0][:started] = true
484+
ungetc "\n"
485+
end
477486
end
478487
end
479-
@current_readed = @readed
480-
@here_readed.clear
481-
Token(TkNL)
488+
unless tk
489+
case @lex_state
490+
when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT
491+
@continue = true
492+
else
493+
@continue = false
494+
@lex_state = :EXPR_BEG
495+
until (@indent_stack.empty? ||
496+
[TkLPAREN, TkLBRACK, TkLBRACE,
497+
TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
498+
@indent_stack.pop
499+
end
500+
end
501+
@current_readed = @readed
502+
@here_readed.clear
503+
tk = Token(TkNL)
504+
end
505+
tk
482506
end
483507

484508
@OP.def_rules("=") do
@@ -533,8 +557,8 @@ def lex_init()
533557
if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS &&
534558
(@lex_state != :EXPR_ARG || @space_seen)
535559
c = peek(0)
536-
if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
537-
tk = identify_here_document
560+
if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-" || c == "~")
561+
tk = identify_here_document(op)
538562
end
539563
end
540564
unless tk
@@ -1073,77 +1097,63 @@ def identify_identifier
10731097
end
10741098
end
10751099

1076-
def identify_here_document
1100+
def identify_here_document(op)
10771101
ch = getc
1102+
start_token = op
10781103
# if lt = PERCENT_LTYPE[ch]
1079-
if ch == "-"
1104+
if ch == "-" or ch == "~"
1105+
start_token.concat ch
10801106
ch = getc
10811107
indent = true
10821108
end
10831109
if /['"`]/ =~ ch
1110+
start_token.concat ch
10841111
user_quote = lt = ch
10851112
quoted = ""
10861113
while (c = getc) && c != lt
10871114
quoted.concat c
10881115
end
1116+
start_token.concat quoted
1117+
start_token.concat lt
10891118
else
10901119
user_quote = nil
10911120
lt = '"'
10921121
quoted = ch.dup
10931122
while (c = getc) && c =~ /\w/
10941123
quoted.concat c
10951124
end
1125+
start_token.concat quoted
10961126
ungetc
10971127
end
10981128

1099-
ltback, @ltype = @ltype, lt
1100-
reserve = []
1101-
while ch = getc
1102-
reserve.push ch
1103-
if ch == "\\"
1104-
reserve.push ch = getc
1105-
elsif ch == "\n"
1106-
break
1107-
end
1108-
end
1109-
1110-
output_heredoc = reserve.join =~ /\A\r?\n\z/
1129+
@heredoc_queue << {
1130+
quoted: quoted,
1131+
lt: lt,
1132+
indent: indent,
1133+
started: false
1134+
}
1135+
@lex_state = :EXPR_BEG
1136+
Token(RDoc::RubyLex::TkHEREDOCBEG, start_token)
1137+
end
11111138

1112-
if output_heredoc then
1113-
doc = '<<'
1114-
doc << '-' if indent
1115-
doc << "#{user_quote}#{quoted}#{user_quote}\n"
1116-
else
1117-
doc = '"'
1118-
end
1139+
def identify_here_document_body(quoted, lt, indent)
1140+
ltback, @ltype = @ltype, lt
11191141

1120-
@current_readed = @readed
1142+
doc = ""
1143+
heredoc_end = nil
11211144
while l = gets
11221145
l = l.sub(/(:?\r)?\n\z/, "\n")
11231146
if (indent ? l.strip : l.chomp) == quoted
1147+
heredoc_end = l
11241148
break
11251149
end
11261150
doc << l
11271151
end
1152+
raise Error, "Missing terminating #{quoted} for string" unless heredoc_end
11281153

1129-
if output_heredoc then
1130-
raise Error, "Missing terminating #{quoted} for string" unless l
1131-
1132-
doc << l.chomp
1133-
else
1134-
doc << '"'
1135-
end
1136-
1137-
@current_readed = @here_readed
1138-
@here_readed.concat reserve
1139-
while ch = reserve.pop
1140-
ungetc ch
1141-
end
1142-
1143-
token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt]
11441154
@ltype = ltback
1145-
@lex_state = :EXPR_END
1146-
Token(token_class, doc)
1155+
@lex_state = :EXPR_BEG
1156+
[Token(RDoc::RubyLex::TkHEREDOC, doc), heredoc_end]
11471157
end
11481158

11491159
def identify_quotation

lib/rdoc/ruby_token.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,8 @@ def Token(token, value = nil)
329329
[:TkCVAR, TkId],
330330
[:TkIVAR, TkId],
331331
[:TkCONSTANT, TkId],
332+
[:TkHEREDOCBEG, TkId],
333+
[:TkHEREDOCEND, TkId],
332334

333335
[:TkINTEGER, TkVal],
334336
[:TkFLOAT, TkVal],

test/test_rdoc_ruby_lex.rb

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,11 @@ def x
8989
@TK::TkIDENTIFIER.new( 4, 1, 4, 'x'),
9090
@TK::TkNL .new( 5, 1, 5, "\n"),
9191
@TK::TkSPACE .new( 6, 2, 0, ' '),
92-
@TK::TkHEREDOC .new( 8, 2, 2,
93-
%Q{<<E\nLine 1\nLine 2\nE}),
94-
@TK::TkNL .new(27, 5, 28, "\n"),
92+
93+
@TK::TkHEREDOCBEG.new( 8, 2, 2, '<<E'),
94+
@TK::TkNL .new(11, 2, 6, "\n"),
95+
@TK::TkHEREDOC .new(11, 2, 6, "Line 1\nLine 2\n"),
96+
@TK::TkHEREDOCEND.new(27, 5, 26, "E\n"),
9597
@TK::TkEND .new(28, 6, 0, 'end'),
9698
@TK::TkNL .new(31, 6, 28, "\n"),
9799
]
@@ -162,10 +164,12 @@ def test_class_tokenize_heredoc_CR_NL
162164
@TK::TkSPACE .new( 6, 1, 6, ' '),
163165
@TK::TkASSIGN .new( 7, 1, 7, '='),
164166
@TK::TkSPACE .new( 8, 1, 8, ' '),
165-
@TK::TkHEREDOC .new( 9, 1, 9,
166-
%Q{<<-STRING\nLine 1\nLine 2\n STRING}),
167-
@TK::TkSPACE .new(44, 4, 45, "\r"),
168-
@TK::TkNL .new(45, 4, 46, "\n"),
167+
@TK::TkHEREDOCBEG.new( 9, 1, 9, '<<-STRING'),
168+
@TK::TkSPACE .new(18, 1, 18, "\r"),
169+
@TK::TkNL .new(19, 1, 19, "\n"),
170+
@TK::TkHEREDOC .new(19, 1, 19,
171+
%Q{Line 1\nLine 2\n}),
172+
@TK::TkHEREDOCEND.new(45, 4, 36, " STRING\n"),
169173
]
170174

171175
assert_equal expected, tokens
@@ -184,10 +188,12 @@ def test_class_tokenize_heredoc_call
184188
@TK::TkSPACE .new( 6, 1, 6, ' '),
185189
@TK::TkASSIGN .new( 7, 1, 7, '='),
186190
@TK::TkSPACE .new( 8, 1, 8, ' '),
187-
@TK::TkSTRING .new( 9, 1, 9, %Q{"Line 1\nLine 2\n"}),
188-
@TK::TkDOT .new(41, 4, 42, '.'),
189-
@TK::TkIDENTIFIER.new(42, 4, 43, 'chomp'),
190-
@TK::TkNL .new(47, 4, 48, "\n"),
191+
@TK::TkHEREDOCBEG.new( 9, 1, 9, '<<-STRING'),
192+
@TK::TkDOT .new(18, 1, 18, '.'),
193+
@TK::TkIDENTIFIER.new(19, 1, 19, 'chomp'),
194+
@TK::TkNL .new(24, 1, 24, "\n"),
195+
@TK::TkHEREDOC .new(24, 1, 24, "Line 1\nLine 2\n"),
196+
@TK::TkHEREDOCEND.new(47, 4, 39, " STRING\n"),
191197
]
192198

193199
assert_equal expected, tokens
@@ -206,9 +212,12 @@ def test_class_tokenize_heredoc_indent
206212
@TK::TkSPACE .new( 6, 1, 6, ' '),
207213
@TK::TkASSIGN .new( 7, 1, 7, '='),
208214
@TK::TkSPACE .new( 8, 1, 8, ' '),
209-
@TK::TkHEREDOC .new( 9, 1, 9,
210-
%Q{<<-STRING\nLine 1\nLine 2\n STRING}),
211-
@TK::TkNL .new(41, 4, 42, "\n"),
215+
216+
217+
@TK::TkHEREDOCBEG.new( 9, 1, 9, '<<-STRING'),
218+
@TK::TkNL .new(18, 1, 18, "\n"),
219+
@TK::TkHEREDOC .new(18, 1, 18, "Line 1\nLine 2\n"),
220+
@TK::TkHEREDOCEND.new(41, 4, 33, " STRING\n")
212221
]
213222

214223
assert_equal expected, tokens
@@ -238,8 +247,10 @@ def test_class_tokenize_heredoc_percent_N
238247
@TK::TkSPACE .new( 1, 1, 1, ' '),
239248
@TK::TkIDENTIFIER.new( 2, 1, 2, 'b'),
240249
@TK::TkSPACE .new( 3, 1, 3, ' '),
241-
@TK::TkHEREDOC .new( 4, 1, 4, %Q{<<-U\n%N\nU}),
242-
@TK::TkNL .new(13, 3, 14, "\n"),
250+
@TK::TkHEREDOCBEG.new( 4, 1, 4, '<<-U'),
251+
@TK::TkNL .new( 8, 1, 8, "\n"),
252+
@TK::TkHEREDOC .new( 8, 1, 8, "%N\n"),
253+
@TK::TkHEREDOCEND.new(13, 3, 12, "U\n")
243254
]
244255

245256
assert_equal expected, tokens

0 commit comments

Comments
 (0)