diff --git a/spec/std/io/io_spec.cr b/spec/std/io/io_spec.cr index dfe944e98000..6c96c54d5b07 100644 --- a/spec/std/io/io_spec.cr +++ b/spec/std/io/io_spec.cr @@ -796,6 +796,19 @@ describe IO do io.encoding.should eq("UTF-8") end + it "handles long lines correctly with invalid: :skip" do + # Using both ASCII characters and a 26-byte Unicode characters to + # ensure we hit as many byte boundaries inside the Unicode characters + # as we can to get sufficient confidence in this test. + text = "test string 👩🏾‍🤝‍👨🏻" * 10240 + io = IO::Memory.new + io.set_encoding "UTF-8", invalid: :skip + io << text + + io.bytesize.should eq text.bytesize + io.to_slice.should eq text.to_slice + end + it "does skips when converting to UTF-8" do io = SimpleIOMemory.new(Base64.decode_string("ey8qx+Tl8fwg7+Dw4Ozl8vD7IOLo5+jy4CovfQ==")) io.set_encoding("UTF-8", invalid: :skip) diff --git a/src/crystal/iconv.cr b/src/crystal/iconv.cr index 59fd9519c983..4a45b3876a5a 100644 --- a/src/crystal/iconv.cr +++ b/src/crystal/iconv.cr @@ -58,10 +58,20 @@ struct Crystal::Iconv def convert(inbuf : UInt8**, inbytesleft : LibC::SizeT*, outbuf : UInt8**, outbytesleft : LibC::SizeT*) {% if flag?(:freebsd) || flag?(:dragonfly) %} if @skip_invalid - return LibC.__iconv(@iconv, inbuf, inbytesleft, outbuf, outbytesleft, LibC::ICONV_F_HIDE_INVALID, out invalids) + err = LibC.__iconv(@iconv, inbuf, inbytesleft, outbuf, outbytesleft, LibC::ICONV_F_HIDE_INVALID, out invalids) + if err == ERROR && Errno.value != Errno::E2BIG + return err + else + return + end end {% end %} - {{ USE_LIBICONV ? LibIconv : LibC }}.iconv(@iconv, inbuf, inbytesleft, outbuf, outbytesleft) + + err = {{ USE_LIBICONV ? LibIconv : LibC }}.iconv(@iconv, inbuf, inbytesleft, outbuf, outbytesleft) + if err == Crystal::Iconv::ERROR && Errno.value != Errno::E2BIG && Errno.value != Errno::EINVAL + handle_invalid(inbuf, inbytesleft) + end + err end def handle_invalid(inbuf, inbytesleft) diff --git a/src/io/encoding.cr b/src/io/encoding.cr index b741ea07e675..4cf57f413ef8 100644 --- a/src/io/encoding.cr +++ b/src/io/encoding.cr @@ -39,11 +39,11 @@ class IO while inbytesleft > 0 outbuf_ptr = outbuf.to_unsafe outbytesleft = LibC::SizeT.new(outbuf.size) - err = @iconv.convert(pointerof(inbuf_ptr), pointerof(inbytesleft), pointerof(outbuf_ptr), pointerof(outbytesleft)) - if err == Crystal::Iconv::ERROR + result = @iconv.convert(pointerof(inbuf_ptr), pointerof(inbytesleft), pointerof(outbuf_ptr), pointerof(outbytesleft)) + io.write(outbuf.to_slice[0, outbuf.size - outbytesleft]) + if result == Crystal::Iconv::ERROR && Errno.value == Errno::EINVAL @iconv.handle_invalid(pointerof(inbuf_ptr), pointerof(inbytesleft)) end - io.write(outbuf.to_slice[0, outbuf.size - outbytesleft]) end end @@ -97,11 +97,7 @@ class IO # Check for errors if result == Crystal::Iconv::ERROR - case Errno.value - when Errno::EILSEQ - # For an illegal sequence we just skip one byte and we'll continue next - @iconv.handle_invalid(pointerof(@in_buffer), pointerof(@in_buffer_left)) - when Errno::EINVAL + if Errno.value == Errno::EINVAL # EINVAL means "An incomplete multibyte sequence has been encountered in the input." old_in_buffer_left = @in_buffer_left diff --git a/src/string.cr b/src/string.cr index 940ba15f6602..7a527a8c2ec4 100644 --- a/src/string.cr +++ b/src/string.cr @@ -1823,7 +1823,7 @@ class String outbuf_ptr = outbuf.to_unsafe outbytesleft = LibC::SizeT.new(outbuf.size) err = iconv.convert(pointerof(inbuf_ptr), pointerof(inbytesleft), pointerof(outbuf_ptr), pointerof(outbytesleft)) - if err == Crystal::Iconv::ERROR + if err == Crystal::Iconv::ERROR && Errno.value == Errno::EINVAL iconv.handle_invalid(pointerof(inbuf_ptr), pointerof(inbytesleft)) end io.write(outbuf.to_slice[0, outbuf.size - outbytesleft]) @@ -1832,7 +1832,7 @@ class String outbuf_ptr = outbuf.to_unsafe outbytesleft = LibC::SizeT.new(outbuf.size) err = iconv.convert(Pointer(UInt8*).null, Pointer(LibC::SizeT).null, pointerof(outbuf_ptr), pointerof(outbytesleft)) - if err == Crystal::Iconv::ERROR + if err == Crystal::Iconv::ERROR && Errno.value == Errno::EINVAL iconv.handle_invalid(pointerof(inbuf_ptr), pointerof(inbytesleft)) end io.write(outbuf.to_slice[0, outbuf.size - outbytesleft])