lib/characteristics/byte.rb in characteristics-0.2.0 vs lib/characteristics/byte.rb in characteristics-0.3.0

- old
+ new

@@ -1,31 +1,44 @@ class ByteCharacteristics < Characteristics - HAS_C1 = /^(ISO-8859-)/ + HAS_C1 = /^(ISO-8859-|TIS-620)/ UNASSIGNED = { - 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/, - 0x83 => /^(Windows-(1250|1257))/, - 0x88 => /^(Windows-(1250|1253|1257))/, - 0x8A => /^(Windows-(1253|1255|1257|1258))/, - 0x8C => /^(Windows-(1253|1255|1257))/, - 0x8D => /^(Windows-(1252|1253|1254|1255|1258))/, - 0x8E => /^(Windows-(1253|1254|1255|1258))/, - 0x8F => /^(Windows-(1252|1253|1254|1255|1258))/, + 0x80 => /^(IBM869)/, + 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|IBM869|Windows-874)/, + 0x82 => /^(IBM869|Windows-874)/, + 0x83 => /^(Windows-(1250|1257)|IBM869|Windows-874)/, + 0x84 => /^(IBM869|Windows-874)/, + 0x85 => /^(IBM869)/, + 0x86 => /^(Windows-874)/, + 0x87 => /^(IBM869|Windows-874)/, + 0x88 => /^(Windows-(1250|1253|1257)|Windows-874)/, + 0x89 => /^(Windows-874)/, + 0x8A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/, + 0x8B => /^(Windows-874)/, + 0x8C => /^(Windows-(1253|1255|1257)|Windows-874)/, + 0x8D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/, + 0x8E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/, + 0x8F => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/, - 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/, - 0x98 => /^(Windows-(1250|1251|1253|1257))/, - 0x9A => /^(Windows-(1253|1255|1257|1258))/, - 0x9C => /^(Windows-(1253|1255|1257))/, - 0x9D => /^(Windows-(1252|1253|1254|1255|1258))/, - 0x9E => /^(Windows-(1253|1254|1255|1258))/, - 0x9F => /^(Windows-(1253|1255|1257))/, + 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|macThai|Windows-874)/, + 0x93 => /^(IBM869)/, + 0x94 => /^(IBM869)/, + 0x98 => /^(Windows-(1250|1251|1253|1257)|Windows-874)/, + 0x99 => /^(Windows-874)/, + 0x9A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/, + 0x9B => /^(IBM864|Windows-874)/, + 0x9C => /^(Windows-(1253|1255|1257)|IBM864|Windows-874)/, + 0x9D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/, + 0x9E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/, + 0x9F => /^(Windows-(1253|1255|1257)|IBM864|macThai|Windows-874)/, + 0xA0 => /^(TIS-620)/, 0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/, 0xA2 => /^(ISO-8859-(6))/, 0xA3 => /^(ISO-8859-(6))/, 0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/, - 0xA6 => /^(ISO-8859-(6))/, + 0xA6 => /^(ISO-8859-(6)|IBM864)/, 0xA7 => /^(ISO-8859-(6))/, 0xA8 => /^(ISO-8859-(6))/, 0xA9 => /^(ISO-8859-(6))/, 0xAA => /^(ISO-8859-(6)|Windows-(1253))/, 0xAB => /^(ISO-8859-(6))/, @@ -68,38 +81,40 @@ 0xD0 => /^(ISO-8859-(3|8))/, 0xD1 => /^(ISO-8859-(8))/, 0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/, 0xD3 => /^(ISO-8859-(8))/, 0xD4 => /^(ISO-8859-(8))/, - 0xD5 => /^(ISO-8859-(8))/, + 0xD5 => /^(ISO-8859-(8)|IBM857)/, # IBM857: Ruby does not support euro sign 0xD6 => /^(ISO-8859-(8))/, 0xD7 => /^(ISO-8859-(8))/, 0xD8 => /^(ISO-8859-(8))/, 0xD9 => /^(ISO-8859-(8)|Windows-(1255))/, 0xDA => /^(ISO-8859-(8)|Windows-(1255))/, - 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/, - 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/, - 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/, - 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/, + 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/, + 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/, + 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/, + 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/, 0xDF => /^(ISO-8859-(6)|Windows-(1255))/, 0xE3 => /^(ISO-8859-(3))/, + 0xE7 => /^(IBM857)/, - 0xF0 => /^(ISO-8859-(3))/, + 0xF0 => /^(ISO-8859-(3))/, # mac: Treating F0 as always assigned + 0xF2 => /^(IBM857)/, 0xF3 => /^(ISO-8859-(6))/, 0xF4 => /^(ISO-8859-(6))/, - 0xF5 => /^(ISO-8859-(6))/, + 0xF5 => /^(ISO-8859-(6)|macTurkish)/, 0xF6 => /^(ISO-8859-(6))/, 0xF7 => /^(ISO-8859-(6))/, 0xF8 => /^(ISO-8859-(6))/, 0xF9 => /^(ISO-8859-(6))/, 0xFA => /^(ISO-8859-(6))/, 0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/, - 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/, - 0xFD => /^(ISO-8859-(6|11))/, - 0xFE => /^(ISO-8859-(6|11))/, - 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/, + 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255)|macThai|TIS-620|Windows-874)/, + 0xFD => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/, + 0xFE => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/, + 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255)|IBM864|macGreek|macThai|TIS-620|Windows-874)/, # macGreek: Ruby does not know of soft hyphen at FF }.freeze BLANKS = [ 0x9, 0x20, @@ -111,34 +126,49 @@ 0xC, 0xD, ].freeze EXTRA_BLANKS = { - 0xA0 => /^(ISO-8859-|Windows-125)/, + 0xA0 => /^(ISO-8859-|Windows-125|macThai|Windows-874)/, + 0xA1 => /^IBM864/, + 0xAD => /^(ISO-8859-(?!11)|Windows-125)/, + 0x9A => /^KOI8-/, 0x9D => /^Windows-(1256)/, - 0x9F => /^Windows-(1256)/, + 0x9E => /^Windows-(1256)/, + 0xCA => /^mac(?!Thai)/, + 0xDB => /^macThai/, + 0xDC => /^macThai/, + 0xF0 => /^(IBM(?!437|737|86)|IBM869|CP)/, + 0xFF => /^(IBM(?!864)|CP)/, # |macGreek, but is unnasigned in Ruby }.freeze + FORMATS = { + 0xFD => /^(ISO-8859-8|Windows-(1255|1256))/, + 0xFE => /^(ISO-8859-8|Windows-(1255|1256))/, + }.freeze + def initialize(char) super @ord = char.ord end def encoding_has_c0? - # !!(HAS_C0 =~ @encoding_name) true end def encoding_has_delete? - # !!(HAS_C0 =~ @encoding_name) true end def encoding_has_c1? !!(HAS_C1 =~ @encoding_name) end + def unicode? + false + end + def assigned? control? || UNASSIGNED[@ord] !~ @encoding_name end def control? @@ -159,7 +189,11 @@ def blank? BLANKS.include?(@ord) || SEPARATORS.include?(@ord) || EXTRA_BLANKS[@ord] =~ @encoding_name + end + + def format? + FORMATS[@ord] =~ @encoding_name end end \ No newline at end of file