From 9a5de72253fab586c86e115780a02c506788fd86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Tue, 10 Mar 2026 14:44:22 +0100 Subject: [PATCH 1/6] Add `allow_escapes` to `DelimiterState.precent_literal` --- src/compiler/crystal/syntax/lexer.cr | 6 +++--- src/compiler/crystal/syntax/token.cr | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 881b7576c712..cec05b8206b7 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -315,7 +315,7 @@ module Crystal start_char = next_char next_char :SYMBOL_ARRAY_START @token.raw = "%i#{start_char}" if @wants_raw - @token.delimiter_state = Token::DelimiterState.new(:symbol_array, start_char, closing_char(start_char)) + @token.delimiter_state = Token::DelimiterState.new(:symbol_array, start_char, closing_char(start_char), allow_escapes: false) else @token.type = :OP_PERCENT end @@ -357,7 +357,7 @@ module Crystal start_char = next_char next_char :STRING_ARRAY_START @token.raw = "%w#{start_char}" if @wants_raw - @token.delimiter_state = Token::DelimiterState.new(:string_array, start_char, closing_char(start_char)) + @token.delimiter_state = Token::DelimiterState.new(:string_array, start_char, closing_char(start_char), allow_escapes: false) else @token.type = :OP_PERCENT end @@ -1800,7 +1800,7 @@ module Crystal next_char elsif char == 'w' && peek_next_char.in?('(', '<', '[', '{', '|') next_char - delimiter_state = Token::DelimiterState.percent_literal(:string_array, current_char, closing_char) + delimiter_state = Token::DelimiterState.percent_literal(:string_array, current_char, closing_char, allow_escapes: false) next_char elsif char == 'x' && peek_next_char.in?('(', '<', '[', '{', '|') next_char diff --git a/src/compiler/crystal/syntax/token.cr b/src/compiler/crystal/syntax/token.cr index b067c434ae78..e789fbb5d8f2 100644 --- a/src/compiler/crystal/syntax/token.cr +++ b/src/compiler/crystal/syntax/token.cr @@ -330,9 +330,9 @@ module Crystal # Creates a DelimiterState for percent literals in macros. # For symmetric delimiters (||), uses open_count = 0 (no nesting). # For paired delimiters (()), uses open_count = 1 (enables nesting). - def self.percent_literal(kind : DelimiterKind, nest, the_end) + def self.percent_literal(kind : DelimiterKind, nest, the_end, *, allow_escapes : Bool = true) open_count = nest == the_end ? 0 : 1 - new kind, nest, the_end, open_count, 0, true + new kind, nest, the_end, open_count, 0, allow_escapes end def with_open_count_delta(delta) From c11bb313434fd01874c2fa26fbede9a3c2db89fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Tue, 10 Mar 2026 15:23:43 +0100 Subject: [PATCH 2/6] Standardize parsing of symbol and string array literals --- .../compiler/lexer/lexer_string_array_spec.cr | 30 +++-- spec/compiler/parser/parser_spec.cr | 12 +- src/compiler/crystal/syntax/lexer.cr | 109 ++++++------------ src/compiler/crystal/syntax/parser.cr | 52 +++++++-- src/compiler/crystal/syntax/token.cr | 4 + src/compiler/crystal/tools/formatter.cr | 61 +++++----- src/crystal/syntax_highlighter.cr | 3 +- 7 files changed, 145 insertions(+), 126 deletions(-) diff --git a/spec/compiler/lexer/lexer_string_array_spec.cr b/spec/compiler/lexer/lexer_string_array_spec.cr index cd4ed40b0f15..17d6a68f5d43 100644 --- a/spec/compiler/lexer/lexer_string_array_spec.cr +++ b/spec/compiler/lexer/lexer_string_array_spec.cr @@ -8,19 +8,23 @@ private def it_should_be_valid_string_array_lexer(lexer) token = lexer.next_token token.type.should eq(t :STRING_ARRAY_START) - token = lexer.next_string_array_token + token = lexer.next_string_token(token.delimiter_state) token.type.should eq(t :STRING) token.value.should eq("one") - token = lexer.next_string_array_token + token = lexer.next_string_token(token.delimiter_state) + token.type.should eq(t :SPACE) + token.value.should eq(" ") + + token = lexer.next_string_token(token.delimiter_state) token.type.should eq(t :STRING) token.value.should eq("two") - token = lexer.next_string_array_token + token = lexer.next_string_token(token.delimiter_state) token.type.should eq(t :STRING_ARRAY_END) end -describe "Lexer string array" do +describe "Lexer %w string array" do it "lexes simple string array" do lexer = Lexer.new("%w(one two)") @@ -33,25 +37,29 @@ describe "Lexer string array" do token = lexer.next_token token.type.should eq(t :STRING_ARRAY_START) - token = lexer.next_string_array_token + token = lexer.next_string_token(token.delimiter_state) token.type.should eq(t :STRING) token.value.should eq("one") - token = lexer.next_string_array_token + token = lexer.next_string_token(token.delimiter_state) + token.type.should eq(t :SPACE) + token.value.should eq(" \n ") + + token = lexer.next_string_token(token.delimiter_state) token.type.should eq(t :STRING) token.value.should eq("two") - token = lexer.next_string_array_token + token = lexer.next_string_token(token.delimiter_state) token.type.should eq(t :STRING_ARRAY_END) end it "lexes string array with new line gives correct column for next token" do lexer = Lexer.new("%w(one \n two).") - lexer.next_token - lexer.next_string_array_token - lexer.next_string_array_token - lexer.next_string_array_token + token = lexer.next_token + lexer.next_string_token(token.delimiter_state) + lexer.next_string_token(token.delimiter_state) + lexer.next_string_token(token.delimiter_state) token = lexer.next_token token.line_number.should eq(2) diff --git a/spec/compiler/parser/parser_spec.cr b/spec/compiler/parser/parser_spec.cr index 752f70e72e73..65bdfc0ebd7e 100644 --- a/spec/compiler/parser/parser_spec.cr +++ b/spec/compiler/parser/parser_spec.cr @@ -3032,8 +3032,8 @@ module Crystal "/" => regex("a\\\\ b"), "%x[" => command("a\\ b"), "`" => command("a\\ b"), - "%w[" => string_array("a\\ b".string), - "%i[" => symbol_array("a\\ b".symbol), + "%w[" => string_array("a\\".string, "b".string), + "%i[" => symbol_array("a\\".symbol, "b".symbol), ":\"" => "a\\ b".symbol, } it_parses_literal "\\\\a", { @@ -3045,8 +3045,8 @@ module Crystal "/" => regex("\\\\a"), "%x[" => command("\\a"), "`" => command("\\a"), - "%w[" => string_array("\\\\a".string), - "%i[" => symbol_array("\\\\a".symbol), + "%w[" => string_array("\\a".string), + "%i[" => symbol_array("\\a".symbol), ":\"" => "\\a".symbol, } it_parses_literal "\\", { @@ -3071,8 +3071,8 @@ module Crystal "/" => regex("\\\\"), "%x[" => command("\\"), "`" => command("\\"), - "%w[" => "Unterminated string array literal", # FIXME: #12277 - "%i[" => "Unterminated symbol array literal", # FIXME: #12277 + "%w[" => string_array("\\".string), + "%i[" => symbol_array("\\".symbol), ":\"" => "\\".symbol, } it_parses_literal "\\\\\\", { diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index cec05b8206b7..8d4783acf33b 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1444,13 +1444,13 @@ module Crystal end end - case current_char + case char = current_char when '\0' raise_unterminated_quoted delimiter_state when string_end next_char if string_open_count == 0 - @token.type = :DELIMITER_END + @token.type = delimiter_state.kind.array? ? Token::Kind::STRING_ARRAY_END : Token::Kind::DELIMITER_END else @token.type = :STRING @token.value = string_end.to_s @@ -1545,6 +1545,16 @@ module Crystal next_char end end + elsif delimiter_state.kind.array? + case char = next_char + when '\\', .ascii_whitespace?, string_end, string_nest + string_token_escape_value char.to_s + else + next_char + @token.type = :STRING + @token.value = string_range(start) + @token.invalid_escape = true + end else @token.type = :STRING @token.value = current_char.to_s @@ -1570,17 +1580,34 @@ module Crystal @token.line_number = @line_number @token.column_number = @column_number - if delimiter_state.kind.heredoc? + case delimiter_state.kind + when .heredoc? unless check_heredoc_end delimiter_state next_string_token_noescape delimiter_state @token.value = string_range(start) end + when .array? + @token.type = :SPACE else @token.type = :STRING @token.value = is_slash_r ? "\r\n" : "\n" end else - next_string_token_noescape delimiter_state + if delimiter_state.kind.array? + if char.ascii_whitespace? + while char.ascii_whitespace? + handle_slash_r_slash_n_or_slash_n + incr_line_number if char == '\n' + char = next_char + end + + @token.type = :SPACE + else + next_string_array_token_noescape delimiter_state + end + else + next_string_token_noescape delimiter_state + end @token.value = string_range(start) end @@ -1645,7 +1672,9 @@ module Crystal when .regex? then "Unterminated regular expression" when .heredoc? "Unterminated heredoc: can't find \"#{delimiter_state.end}\" anywhere before the end of file" - when .string? then "Unterminated string literal" + when .string? then "Unterminated string literal" + when .string_array? then "Unterminated string array literal" + when .symbol_array? then "Unterminated symbol array literal" else ::raise "unreachable" end @@ -2307,77 +2336,15 @@ module Crystal set_token_raw_from_start(start) end - def next_string_array_token - while true - if current_char == '\n' - next_char - incr_line_number 1 - elsif current_char.ascii_whitespace? - next_char - else - break - end - end - - reset_token - - if current_char == @token.delimiter_state.end - @token.raw = current_char.to_s if @wants_raw - next_char :STRING_ARRAY_END - return @token - end - - start = current_pos - sub_start = start - value = String::Builder.new - - escaped = false - while true - case current_char - when Char::ZERO - break # raise is handled by parser - when @token.delimiter_state.end - unless escaped - # For symmetric delimiters (like ||), don't use nesting logic - if @token.delimiter_state.nest == @token.delimiter_state.end || @token.delimiter_state.open_count == 0 - break - else - @token.delimiter_state = @token.delimiter_state.with_open_count_delta(-1) - end - end - when @token.delimiter_state.nest - unless @token.delimiter_state.nest == @token.delimiter_state.end || escaped - @token.delimiter_state = @token.delimiter_state.with_open_count_delta(+1) - end - when .ascii_whitespace? - break unless escaped - else - if escaped - value << '\\' - end - end - - escaped = current_char == '\\' - if escaped - value.write @reader.string.to_slice[sub_start, current_pos - sub_start] - sub_start = current_pos + 1 - end + def next_string_array_token_noescape(delimiter_state) + string_end = delimiter_state.end + string_nest = delimiter_state.nest + while !current_char.in?(string_end, string_nest, '\0', '\\', '#') && !current_char.ascii_whitespace? next_char end - if start == current_pos - @token.type = :EOF - return @token - end - - value.write @reader.string.to_slice[sub_start, current_pos - sub_start] - @token.type = :STRING - @token.value = value.to_s - set_token_raw_from_start(start) - - @token end def consume_loc_pragma diff --git a/src/compiler/crystal/syntax/parser.cr b/src/compiler/crystal/syntax/parser.cr index de874ea0bd0e..d82a6b25df9d 100644 --- a/src/compiler/crystal/syntax/parser.cr +++ b/src/compiler/crystal/syntax/parser.cr @@ -2406,32 +2406,62 @@ module Crystal end def parse_string_array - parse_string_or_symbol_array StringLiteral, "String" + strings, end_location = parse_string_or_symbol_array_strings do |pieces| + combine_pieces(pieces, @token.delimiter_state) + end + + ArrayLiteral.new(strings, Path.global("String")).at_end(end_location) end def parse_symbol_array - parse_string_or_symbol_array SymbolLiteral, "Symbol" + strings, end_location = parse_string_or_symbol_array_strings do |pieces| + string = combine_stringliteral_pieces(pieces, @token.delimiter_state) + SymbolLiteral.new(string) + end + + ArrayLiteral.new(strings, Path.global("Symbol")).at_end(end_location) end - def parse_string_or_symbol_array(klass, elements_type) + def parse_string_or_symbol_array_strings(&) strings = [] of ASTNode + + while !@token.type.string_array_end? + if element = parse_percent_array_element { |pieces| yield pieces } + strings << element + end + end + + check :STRING_ARRAY_END + + end_location = @token.location + + next_token + + {strings, end_location} + end + + def parse_percent_array_element(&) + pieces = [] of Piece + start_location = nil end_location = nil + delimiter_state = @token.delimiter_state while true - next_string_array_token + end_location = token_end_location + next_string_token(delimiter_state) + start_location ||= @token.location + delimiter_state = @token.delimiter_state case @token.type when .string? - strings << klass.new(@token.value.to_s).at(@token.location).at_end(token_end_location) - when .string_array_end? - end_location = token_end_location - next_token - break + pieces << Piece.new(@token.value.to_s, @token.line_number) else - raise "Unterminated #{elements_type.downcase} array literal" + break end end - ArrayLiteral.new(strings, Path.global(elements_type)).at_end(end_location) + return if pieces.empty? + + (yield pieces).at(start_location).at_end(end_location) end def parse_empty_array_literal diff --git a/src/compiler/crystal/syntax/token.cr b/src/compiler/crystal/syntax/token.cr index e789fbb5d8f2..92fe6a394801 100644 --- a/src/compiler/crystal/syntax/token.cr +++ b/src/compiler/crystal/syntax/token.cr @@ -299,6 +299,10 @@ module Crystal SYMBOL_ARRAY COMMAND HEREDOC + + def array? + string_array? || symbol_array? + end end record DelimiterState, diff --git a/src/compiler/crystal/tools/formatter.cr b/src/compiler/crystal/tools/formatter.cr index 4bb840dc9022..555c17deb675 100644 --- a/src/compiler/crystal/tools/formatter.cr +++ b/src/compiler/crystal/tools/formatter.cr @@ -775,34 +775,47 @@ module Crystal write "[]" next_token when .string_array_start?, .symbol_array_start? - first = true write @token.raw - count = 0 - while true - has_space_newline = space_newline? - if has_space_newline + + next_string_token + + node.elements.each_with_index do |elem, index| + found_space = false + found_newline = false + while @token.type.space? + found_newline ||= @token.raw.includes?("\n") + found_space = true + next_string_token + end + if found_newline write_line - if count == node.elements.size - write_indent - else - write_indent(@indent + 2) - end + write_indent(@indent + 2) + elsif found_space && index != 0 + write " " end - next_string_array_token - case @token.type - when .string? - write " " unless first || has_space_newline - write @token.raw - first = false - when .string_array_end? - write @token.raw - next_token - break + + case elem + when StringLiteral + visit_string_body(elem) + when SymbolLiteral + visit_string_body(elem) else - raise "Bug: unexpected token #{@token.type}" + raise "Bug: unexpected element in string array: #{elem.class}" end - count += 1 end + + # skip trailing space + while @token.type.space? + if @token.raw.includes?("\n") && node.elements.present? + write_line + end + next_string_token + end + + check :STRING_ARRAY_END + write @token.raw + next_token + return false else name = node.name.not_nil! @@ -4580,10 +4593,6 @@ module Crystal @token end - def next_string_array_token - @token = @lexer.next_string_array_token - end - def next_macro_token current_line_number = @lexer.line_number diff --git a/src/crystal/syntax_highlighter.cr b/src/crystal/syntax_highlighter.cr index 255031d33f5c..49f5066fa0ad 100644 --- a/src/crystal/syntax_highlighter.cr +++ b/src/crystal/syntax_highlighter.cr @@ -242,7 +242,8 @@ abstract class Crystal::SyntaxHighlighter render :STRING_ARRAY_START, token.raw while true consume_space_or_newline(lexer) - token = lexer.next_string_array_token + delimiter_state = token.delimiter_state + token = lexer.next_string_token(delimiter_state) case token.type when .string? render :STRING_ARRAY_TOKEN, token.raw From b6b46d6bfc837e76152ef98a1271b0c389f54044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Tue, 17 Mar 2026 13:01:28 +0100 Subject: [PATCH 3/6] Fix write_indent --- spec/compiler/formatter/formatter_spec.cr | 1 + src/compiler/crystal/tools/formatter.cr | 1 + 2 files changed, 2 insertions(+) diff --git a/spec/compiler/formatter/formatter_spec.cr b/spec/compiler/formatter/formatter_spec.cr index 248b87e0b3ca..fc8d511d31fe 100644 --- a/spec/compiler/formatter/formatter_spec.cr +++ b/spec/compiler/formatter/formatter_spec.cr @@ -1815,6 +1815,7 @@ describe Crystal::Formatter do assert_format "1 #=> 2", "1 # => 2" assert_format "1 #=>2", "1 # => 2" assert_format "foo(\n [\n 1,\n 2,\n ],\n [\n 3,\n 4,\n ]\n)" + assert_format "begin\n %w(\n one two\n three four\n )\nend" assert_format "%w(\n one two\n three four\n)" assert_format "a = %w(\n one two\n three four\n)" assert_format "foo &.bar do\n 1 + 2\nend" diff --git a/src/compiler/crystal/tools/formatter.cr b/src/compiler/crystal/tools/formatter.cr index 555c17deb675..0deecb2526fb 100644 --- a/src/compiler/crystal/tools/formatter.cr +++ b/src/compiler/crystal/tools/formatter.cr @@ -808,6 +808,7 @@ module Crystal while @token.type.space? if @token.raw.includes?("\n") && node.elements.present? write_line + write_indent end next_string_token end From edfc185c2d0f1e717c1e305da605c5efa01cbab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Tue, 17 Mar 2026 15:55:19 +0100 Subject: [PATCH 4/6] Fix line counting in percent array literal space tokens --- spec/compiler/formatter/formatter_spec.cr | 2 + src/compiler/crystal/tools/formatter.cr | 54 ++++++++--------------- 2 files changed, 20 insertions(+), 36 deletions(-) diff --git a/spec/compiler/formatter/formatter_spec.cr b/spec/compiler/formatter/formatter_spec.cr index fc8d511d31fe..37f017b4c249 100644 --- a/spec/compiler/formatter/formatter_spec.cr +++ b/spec/compiler/formatter/formatter_spec.cr @@ -1161,6 +1161,8 @@ describe Crystal::Formatter do assert_format "%w{one( two( three)}", "%w{one( two( three)}" assert_format "%i{one( two( three)}", "%i{one( two( three)}" + assert_format "%w(\n\n)\n# ```\n# 1\n# ```\n", "%w()\n# ```\n# 1\n# ```" + assert_format "/foo/" assert_format "/foo/imx" assert_format "/foo \#{ bar }/", "/foo \#{bar}/" diff --git a/src/compiler/crystal/tools/formatter.cr b/src/compiler/crystal/tools/formatter.cr index 0deecb2526fb..d7ccd549fb40 100644 --- a/src/compiler/crystal/tools/formatter.cr +++ b/src/compiler/crystal/tools/formatter.cr @@ -747,26 +747,6 @@ module Crystal false end - def space_newline? - pos, line, col = @lexer.current_pos, @lexer.line_number, @lexer.column_number - while true - char = @lexer.current_char - case char - when ' ', '\t' - @lexer.next_char - when '\n' - @lexer.current_pos = pos - return true - else - break - end - end - @lexer.current_pos = pos - @lexer.line_number = line - @lexer.column_number = col - false - end - def visit(node : ArrayLiteral) case @token.type when .op_lsquare? @@ -777,20 +757,13 @@ module Crystal when .string_array_start?, .symbol_array_start? write @token.raw - next_string_token + @lexer.next_string_token(@token.delimiter_state) node.elements.each_with_index do |elem, index| - found_space = false - found_newline = false - while @token.type.space? - found_newline ||= @token.raw.includes?("\n") - found_space = true - next_string_token - end - if found_newline + if skip_space_in_percent_array_literal write_line write_indent(@indent + 2) - elsif found_space && index != 0 + elsif index != 0 write " " end @@ -805,12 +778,9 @@ module Crystal end # skip trailing space - while @token.type.space? - if @token.raw.includes?("\n") && node.elements.present? - write_line - write_indent - end - next_string_token + if skip_space_in_percent_array_literal && node.elements.present? + write_line + write_indent(@indent) end check :STRING_ARRAY_END @@ -833,6 +803,18 @@ module Crystal false end + def skip_space_in_percent_array_literal + return false unless @token.type.space? + + found_newline = false + while @token.type.space? + found_newline ||= @token.raw.includes?("\n") + @lexer.next_string_token(@token.delimiter_state) + end + + found_newline + end + def visit(node : TupleLiteral) format_literal_elements node.elements, :OP_LCURLY, :OP_RCURLY false From 6651c21b58f142304febb6725f5ede67f3ab02ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Tue, 17 Mar 2026 16:46:15 +0100 Subject: [PATCH 5/6] Fix array elements consisting of multiple tokens --- spec/compiler/formatter/formatter_spec.cr | 1 + src/compiler/crystal/tools/formatter.cr | 10 +++------- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/spec/compiler/formatter/formatter_spec.cr b/spec/compiler/formatter/formatter_spec.cr index 37f017b4c249..7991f7774960 100644 --- a/spec/compiler/formatter/formatter_spec.cr +++ b/spec/compiler/formatter/formatter_spec.cr @@ -1162,6 +1162,7 @@ describe Crystal::Formatter do assert_format "%i{one( two( three)}", "%i{one( two( three)}" assert_format "%w(\n\n)\n# ```\n# 1\n# ```\n", "%w()\n# ```\n# 1\n# ```" + assert_format "%w(a\\ b)" assert_format "/foo/" assert_format "/foo/imx" diff --git a/src/compiler/crystal/tools/formatter.cr b/src/compiler/crystal/tools/formatter.cr index d7ccd549fb40..25b667fa9275 100644 --- a/src/compiler/crystal/tools/formatter.cr +++ b/src/compiler/crystal/tools/formatter.cr @@ -767,13 +767,9 @@ module Crystal write " " end - case elem - when StringLiteral - visit_string_body(elem) - when SymbolLiteral - visit_string_body(elem) - else - raise "Bug: unexpected element in string array: #{elem.class}" + while @token.type.string? + write_sanitized_string_body(@token.delimiter_state.allow_escapes) + @lexer.next_string_token(@token.delimiter_state) end end From d4018b34de81298c032e87e4067b092372edef8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Thu, 19 Mar 2026 09:47:10 +0100 Subject: [PATCH 6/6] Apply suggestion from @ysbaddaden Co-authored-by: Julien Portalier --- src/compiler/crystal/tools/formatter.cr | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/compiler/crystal/tools/formatter.cr b/src/compiler/crystal/tools/formatter.cr index 25b667fa9275..517b1dfebec2 100644 --- a/src/compiler/crystal/tools/formatter.cr +++ b/src/compiler/crystal/tools/formatter.cr @@ -800,8 +800,6 @@ module Crystal end def skip_space_in_percent_array_literal - return false unless @token.type.space? - found_newline = false while @token.type.space? found_newline ||= @token.raw.includes?("\n")