Skip to content

Commit

Permalink
adds more specific specs for cesu-8 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
moofkit authored and eregon committed Oct 22, 2020
1 parent d3074c4 commit 03a3b81
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 3 deletions.
10 changes: 10 additions & 0 deletions core/encoding/list_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@
Encoding.list.select {|e| e.dummy?}.should_not == []
end

it 'includes UTF-8 encoding' do
Encoding.list.include?(Encoding::UTF_8).should be_true
end

ruby_version_is "2.7" do
it 'includes CESU-8 encoding' do
Encoding.list.include?(Encoding::CESU_8).should be_true
end
end

# TODO: Find example that illustrates this
it "updates the list when #find is used to load a new encoding"
end
13 changes: 13 additions & 0 deletions core/integer/chr_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -240,4 +240,17 @@
-> { integer.chr(encoding_name) }.should raise_error(RangeError)
end
end

ruby_version_is "2.7" do
it 'returns a String encoding self interpreted as a codepoint in the CESU-8 encoding' do
# see more details here https://en.wikipedia.org/wiki/CESU-8
# code points from U+0000 to U+FFFF is encoded in the same way as in UTF-8
0x0045.chr(Encoding::CESU_8).bytes.should == 0x0045.chr(Encoding::UTF_8).bytes

# code points in range from U+10000 to U+10FFFF is CESU-8 data containing a 6-byte surrogate pair,
# which decodes to a 4-byte UTF-8 string
0x10400.chr(Encoding::CESU_8).bytes.should != 0x10400.chr(Encoding::UTF_8).bytes
0x10400.chr(Encoding::CESU_8).bytes.to_a.should == [0xED, 0xA0, 0x81, 0xED, 0xB0, 0x80]
end
end
end
3 changes: 0 additions & 3 deletions core/string/valid_encoding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,6 @@
str.force_encoding('MacJapanese').valid_encoding?.should be_false
str.force_encoding('UTF-7').valid_encoding?.should be_true
str.force_encoding('UTF8-MAC').valid_encoding?.should be_true
ruby_version_is "2.7" do
str.force_encoding('CESU-8').valid_encoding?.should be_true
end
end

it "returns false if self is valid in one encoding, but invalid in the one it's tagged with" do
Expand Down

0 comments on commit 03a3b81

Please sign in to comment.