require "setfu/version" require "prime" class BitSet attr_reader :entropy def entropy=(ent) # entropy only gets bigger, just like the universe! @entropy = ent unless @entropy > ent end def self.uppercase_chars set = BitSet.new set.set_bits!(2475880041677272402379145216) set.entropy=91 return set end def self.lowercase_chars set = BitSet.new set.set_bits!(10633823807823001954701781295154855936) set.entropy=123 return set end def self.letter_chars set = BitSet.new set.set_bits!(10633823810298881996379053697534001152) set.entropy=123 return set end def self.digit_chars set = BitSet.new set.set_bits!(287948901175001088) set.entropy=58 return set end def self.parse_chars set = BitSet.new set.set_bits!(159507359650170349735020301117175103487) set.entropy=127 return set end def replace(ent) ent = ent.to_bset @mode = ent.mode @entropy = ent.entropy @bits = ent.to_i self end def add_parse_chars! # add! [0..47, 58..64, 91..96, 123..126] add! BitSet.parse_chars self end def add_parse_chars return BitSet.parse_chars | self end def add_digit_chars! add! BitSet.digit_chars self end def add_digit_chars return BitSet.digit_chars | self end def add_uppercase_chars! add! BitSet.uppercase_chars self end def add_uppercase_chars return BitSet.uppercase_chars | self end def add_lowercase_chars! add! BitSet.lowercase_chars self end def add_lowercase_chars return BitSet.lowercase_chars | self end def add_letter_chars return BitSet.letter_chars | self end def add_letter_chars! add! BitSet.letter_chars self end # 01100010011111111100010101001101 ... test # 01100010011111111100010101001101 ... left shift # 001000000011111111000000000001000 ... and with left shift # # 01100010011111111100010101001101 ... test # 00110001001111111110001010100110 ... right shift # 00100000001111111100000000000100 ... and with right shift # # 001000000011111111000000000001000 ... AND with left shift # 00100000001111111100000000000100 ... AND with right shift # 001100000011111111100000000001100 ... OR the two ands # 01100010011111111100010101001101 ... test original # 01100000011111111100000000001100 ... BINGO ... original with the OR gives us the runs only # 00000010000000000000010101000001 ... BINGO ... original XOR with OR gives us the singles! def runs # runs << creates a set of touching elements an_l = (@bits << 1) & @bits an_r = (@bits >> 1) & @bits or_lr = an_l | an_r rtn = BitSet.new rtn.set_bits!(or_lr & @bits) rtn.entropy=self.entropy return rtn end def singles # singles << creates a set of non-touching elements an_l = (@bits << 1) & @bits an_r = (@bits >> 1) & @bits or_lr = an_l | an_r rtn = BitSet.new rtn.set_bits!(or_lr ^ @bits) rtn.entropy=self.entropy return rtn end def max_run # max_run << returns maximum number of consecutive numbers return 0 if @bits==0 mx = 1 bits = @bits loop do bits <<= 1 bits &= @bits break if 0 == bits mx += 1 end return mx end def runs?(cnt=2) bits = @bits (cnt-1).times do bits <<= 1 bits &= @bits return false if 0 == bits end return true end def to_bset return self end def mode return @mode end def initialize(*data) @bits = 0 @entropy = 0 add!(data) self end def zap! @bits = 0 @entropy = 0 self end def inc!(n=1) raise "illegal negative parameter in #inc" if n < 0 @entropy += n @bits <<= n self end def dec!(n=1) raise "illegal negative parameter in #inc" if n < 0 @bits >>= n self end def inc(n=1) return dup.inc!(n) end def dec(n=1) return dup.dec!(n) end def recalculate_entropy! @entropy = 0 bits = @bits num = 1 << 8192 while(bits > num) @entropy += 8192 bits >>= 8192 end num = 1 << 256 while(bits > num) @entropy += 256 bits >>= 256 end num = 1 << 16 while(bits > num) @entropy += 16 bits >>= 16 end while(bits > 0) @entropy += 1 bits >>= 1 end #@entropy += 1 @entropy end # this only works on integer ... String, Array, Range does not implement: &, |, ^ def coerce(other) #puts "TESTING ... coerce called!" return [self, other] # does not seem to get called ... end def dup rtn = BitSet.new rtn.replace(self) return rtn end def ^(item) rtn = self.dup if(item.class==BitSet) rtn.set_bits!(rtn.to_i ^ item.to_i) else rtn = BitSet.new(item) rtn.set_bits!(@bits ^ rtn.to_i) end rtn end def |(item) rtn = self.dup if(item.class==BitSet) rtn.set_bits!(rtn.to_i | item.to_i) self.entropy=item.entropy else rtn = BitSet.new(item) rtn.set_bits!(@bits | rtn.to_i) end rtn end def &(item) rtn = self.dup if(item.class==BitSet) rtn.set_bits!(rtn.to_i & item.to_i) else rtn = BitSet.new(item) rtn.set_bits!(@bits & rtn.to_i) end rtn end def -(item) rtn = BitSet.new rtn.entropy = self.entropy a = self.to_i if(item.class==BitSet) b = item.to_i rtn.entropy = item.entropy else b = BitSet.new(item) rtn.entropy = b.entropy b = b.to_i end c = a & b d = c ^ a rtn.set_bits!(d) rtn end def **(item) # intersection test set_item = BitSet.new(item) return false if (self & set_item).empty? return true end # comparison operators: def ==(item) if(item.class==BitSet) rtn = item.to_i == self.to_i else rtn = BitSet.new(item).to_i == self.to_i end rtn end def !=(item) if(item.class==BitSet) rtn = item.to_i != self.to_i else rtn = BitSet.new(item).to_i != self.to_i end rtn end def set_case(mode=:mode_equal) @mode = mode self end def ===(item) # self ... when clause ... # item ... case clause ... case(item) # Note: coerce does not work in this context ... md = item.to_bset.mode || @mode case md when :mode_intersection return item ** self when :mode_sub return item <= self when :mode_proper return item < self when :mode_super return self <= item when :mode_superproper return self < item else return self == item end end def <=(item) si = BitSet.new item return si.include?(self) end def <(item) si = BitSet.new item return false if (si == self) # not a 'proper' subset return si.include?(self) end def add!(items) if(items.class==BitSet) @bits |= items.to_i entropy = items.entropy elsif(items.class==Range) f=items.first.ord l=items.last.ord f,l = l,f if f>l t = (l-f)+1 t = (1 << t)-1 @bits |= t << f self.entropy = l+1 elsif(items.respond_to? :each_char) items.each_char do |item| @bits |= 1 << item.ord self.entropy = item.ord+1 end elsif(items.respond_to? :each) items.each do |item| add! item end else #assume number raise "negative numbers are not allowed" if items < 0 pos = 1 << items @bits |= pos self.entropy = items+1 end self end def add(items) dup.add!(items) end def empty? @bits == 0 end def include?(items) return false if items.nil? # sets never include nil return false if @bits == 0 # empty sets include nothing including other empty sets if(items.class==BitSet) tmp = @bits & items.to_i return false if tmp==0 return (tmp) == items.to_i elsif(items.class==Range) f=items.first.ord l=items.last.ord f,l = l,f if f>l t = (l-f)+1 t = (1 << t)-1 t = t << f return (@bits & t) == t elsif(items.respond_to? :each_char) return false if items.empty? items.each_char do |item| t = 1 << item.ord return false if 0 == (t & @bits) end elsif(items.respond_to? :each) return false if items.empty? items.each do |item| return false if false==include?(item) end else #assume number t = 1 << items.abs return false if 0 == (t & @bits) end return true end def reverse_each(*prms) # do a benchmark and see which is faster, reverse_each or each int_mode = true set = self.dup prms.each do |prm| if prm.respond_to? :first set &= prm else int_mode = prm end end ary = set.to_a(int_mode) unless block_given? enu = Enumerator.new do |yy| while !ary.empty? yy << ary.pop end return enu end end while !ary.empty? yield ary.pop end end def each(*prms) bits = @bits pos = 0 stop = nil chr_mode = false prms.each do |prm| if prm.respond_to? :first a = prm.first.ord b = prm.last.ord a,b = b,a if a>b pos = a bits >>= pos stop = b else chr_mode = !prm end end unless block_given? enu = Enumerator.new do |yy| while bits > 0 if ((bits & 1) == 1) yy << chr_mode ? pos.chr(Encoding::UTF_8) : pos end pos += 1 unless stop.nil? break if pos > stop end bits >>= 1 end # while end # do return enu end # unless while bits > 0 if ((bits & 1) == 1) yield chr_mode ? pos.chr(Encoding::UTF_8) : pos end pos += 1 unless stop.nil? break if pos > stop end bits >>= 1 end self end #def alias_method :each_member, :each def to_i return @bits end def count cnt = 0 each_member do |toss| cnt += 1 end cnt end def to_s rtn = "" each_member do |ch| rtn += ch.chr(Encoding::UTF_8) end rtn end def to_a(int = true) rtn = [] each_member do |num| rtn.push int ? num : num.chr(Encoding::UTF_8) end rtn end def to_ra(int=true, th=3) raise "Threshold too low" unless th >= 2 rtn = [] cnt = 0 last = -100 flag = false th -= 1 each_member do |num| # byebug unless int rtn.push int ? num : num.chr(Encoding::UTF_8) if ((last+1)==num) # byebug if cnt.nil? cnt += 1 flag = cnt >= th elsif flag save = rtn.pop tmp = rtn.pop(cnt+1) range = (tmp.first)..(tmp.last) rtn.push range rtn.push save cnt = 0 last = -100 flag = false else cnt = 0 end last = num end #byebug if flag # convert final range tmp = rtn.pop(cnt+1) range = (tmp.first)..(tmp.last) rtn.push range end rtn end def set_bits!(bits) raise "negative numbers are not allowed" if bits.to_i < 0 @bits = bits.to_i end def ~@() rtn = dup mask = (1 << @entropy) - 1 rtn.set_bits!(mask ^ @bits) rtn end # new behavior ... single element returns true/false # multi element ... returns subset def [](*pset) idx = nil if pset.count==1 # check for single instance inst[5], inst['t'] if pset.first.kind_of? Integer idx = pset.first elsif pset.first.kind_of? String if pset.first.length == 1 idx = pset.first.ord end end end unless idx.nil? raise "Negative indexes are illegal for BitSet" if idx < 0 self.entropy = idx+1 y = @bits & (1< 0 return false end return pset.to_bset & self end def []=(*pset,value) # pset goes in the box, value after '=' pset = pset.to_bset state = value ? true : false if state replace pset | self # add elements to set else replace self - pset # remove elements from set end return state # this gets ignored, but to be safe, do what the previous version did end # :array :array_chars :string :set def rand(elm_count, format = :set) raise "rand minimum count too low" if elm_count < 1 ary = self.to_a ary.shuffle! ary = ary[0..(elm_count-1)] case format when :array return ary when :array_chars rtn = [] ary.each do |elm| rtn.push elm.chr(Encoding::UTF_8) end return rtn when :string rtn = [] ary.each do |elm| rtn.push elm.chr(Encoding::UTF_8) end return rtn.join "" else # :set return ary.to_bset end end end # end BitSet module SetFuMixinBinaryAndOperator def &(item) a = BitSet.new(self) b = BitSet.new(item) return a & b end end module SetFuMixinBinaryOrOperator def |(item) a = BitSet.new(self) b = BitSet.new(item) return a | b end end module SetFuMixinBinaryXorOperator def ^(item) a = BitSet.new(self) b = BitSet.new(item) return a ^ b end end module SetFuMixinBinaryIntersectionOperator def **(item) # intersection test a = BitSet.new(self) b = BitSet.new(item) return a ** b end end module SetFuMixinToSetMethod def to_bset rtn = BitSet.new(self) # byebug return rtn end end module SetFuMixinTrippleEqualsOperator alias_method :old_triple_equal4Set, :=== def ===(item) return old_triple_equal4Set(item) unless (item.class==BitSet) return self.to_bset === item end end module SetFuMixinBinarySubtractOperator def -(item) a = BitSet.new(self) b = BitSet.new(item) return a - b end end module SetFuMixinBinarySubsetOperator def <=(item) a = BitSet.new(self) b = BitSet.new(item) return a <= b end end module SetFuMixinBinaryProperOperator def <(item) a = BitSet.new(self) b = BitSet.new(item) return a < b end end class String alias_method :old_string_lt4set, :< alias_method :old_string_lte4set, :<= include SetFuMixinBinaryAndOperator include SetFuMixinBinaryOrOperator include SetFuMixinBinaryXorOperator include SetFuMixinBinaryIntersectionOperator include SetFuMixinToSetMethod include SetFuMixinTrippleEqualsOperator include SetFuMixinBinarySubtractOperator def <=(item) return old_string_lte4set(item) if (item.class==String) a = BitSet.new(self) b = BitSet.new(item) return a <= b end def <(item) return old_string_lt4set(item) if (item.class==String) a = BitSet.new(self) b = BitSet.new(item) return a < b end end class Array alias_method :old_subtract_method4set, :- alias_method :old_and_method4set, :& alias_method :old_or_method4set, :| include SetFuMixinBinaryXorOperator include SetFuMixinBinaryIntersectionOperator include SetFuMixinToSetMethod include SetFuMixinTrippleEqualsOperator include SetFuMixinBinarySubsetOperator include SetFuMixinBinaryProperOperator def -(item) return old_subtract_method4set(item) if (item.class==Array) a = BitSet.new(self) b = BitSet.new(item) return a - b end def &(item) return old_and_method4set(item) if (item.class==Array) a = BitSet.new(self) b = BitSet.new(item) return a & b end def |(item) return old_or_method4set(item) if (item.class==Array) a = BitSet.new(self) b = BitSet.new(item) return a | b end end class Range include SetFuMixinBinaryAndOperator include SetFuMixinBinaryOrOperator include SetFuMixinBinaryXorOperator include SetFuMixinBinaryIntersectionOperator include SetFuMixinToSetMethod include SetFuMixinTrippleEqualsOperator include SetFuMixinBinarySubtractOperator include SetFuMixinBinarySubsetOperator include SetFuMixinBinaryProperOperator end # for older rubbies ... slow but works ... >2.1.0 supports this and is much faster # no need to improve as 1.9x is no longer supported class Integer unless 5.respond_to? :bit_length def bit_length pos = 0 n = self loop do break if n == 0 break if n == -1 n >>= 1 pos += 1 end return pos end end end class Integer def lsb_bit_position return nil if zero? pos = 0 n = self while (n & 0xffffffffffffffff) == 0 n >>= 64 pos += 64 end while (n & 0xff) == 0 n >>= 8 pos += 8 end mask = 1 loop do break if mask == (n & mask) mask <<= 1 pos += 1 end return pos end def msb_bit_position t = bit_length return t.zero? ? nil : t-1 end end =begin #### too slow see first, last for improved performance class BitSet def min return nil if empty? range = (self.entropy)..(0) while((range.first - range.last) >= 2) do mid = ((range.first - range.last) >> 1) + range.last top = (range.first)..(mid) bot = (mid)..(range.last) range = self ** bot ? bot : top end #byebug return range.last if (self[range.last]) range.first end def max #look from left #byebug return nil if empty? range = (self.entropy)..(0) while((range.first - range.last) >= 2) do mid = ((range.first - range.last) >> 1) + range.last top = (range.first)..(mid) bot = (mid)..(range.last) range = self ** top ? top : bot end #byebug return range.first if (self[range.first]) range.last end end =end class BitSet def entropy_2n! # set entropy to the next greater power of 2**n @entropy = 1 << @bits.bit_length.bit_length end end class BitSet def first(p=true) rtn = @bits.lsb_bit_position return p ? rtn : rtn.chr(Encoding::UTF_8) end def last(p=true) rtn = @bits.msb_bit_position return p ? rtn : rtn.chr(Encoding::UTF_8) end def first!(p=true) rtn = @bits.lsb_bit_position replace(self ^ [rtn]) unless rtn.nil? return p ? rtn : rtn.chr(Encoding::UTF_8) end def last!(p=true) rtn = @bits.msb_bit_position replace(self ^ [rtn]) unless rtn.nil? return p ? rtn : rtn.chr(Encoding::UTF_8) end end class BitSet alias_method :min, :first alias_method :max, :last end class BitSet @@even_bits = 0x5555555555555555555555555555555555555555555555555555555555555555 @@odd_bits = 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa @@shifts = 256 def odd t = @@odd_bits s = @@shifts while @bits > t t |= t << s s <<= 1 end rtn = self.dup rtn.set_bits!(t & @bits) return rtn end def even t = @@even_bits s = @@shifts while @bits > t t |= t << s s <<= 1 end rtn = self.dup rtn.set_bits!(t & @bits) return rtn end def odd! t1 = @@odd_bits t2 = @@even_bits s = @@shifts while @bits > t2 t1 |= t1 << s t2 |= t2 << s s <<= 1 end rtn = self.dup rtn.set_bits!(t2 & @bits) self.set_bits!(t1 & @bits) return rtn end def even! t1 = @@odd_bits t2 = @@even_bits s = @@shifts while @bits > t2 t1 |= t1 << s t2 |= t2 << s s <<= 1 end rtn = self.dup rtn.set_bits!(t1 & @bits) self.set_bits!(t2 & @bits) return rtn end def odd? return false if empty? t = @@odd_bits s = @@shifts while @bits > t t |= t << s s <<= 1 end tst = t & @bits return tst == @bits end def even? return false if empty? t = @@even_bits s = @@shifts while @bits > t t |= t << s s <<= 1 end tst = t & @bits return tst == @bits end end class BitSet def add_primes!(n=100) self.add!(Prime.take(n)) end def add_primes(n=100) self.add(Prime.take(n)) end end class BitSet def self.default_utf_case_pairs @@TRANS_HASH = { 192=>224, 193=>225, 194=>226, 195=>227, 196=>228, 197=>229, 198=>230, 199=>231, 200=>232, 201=>233, 202=>234, 203=>235, 204=>236, 205=>237, 206=>238, 207=>239, 208=>240, 209=>241, 210=>242, 211=>243, 212=>244, 213=>245, 214=>246, 216=>248, 217=>249, 218=>250, 219=>251, 220=>252, 221=>253, 222=>254, 224=>192, 225=>193, 226=>194, 227=>195, 228=>196, 229=>197, 230=>198, 231=>199, 232=>200, 233=>201, 234=>202, 235=>203, 236=>204, 237=>205, 238=>206, 239=>207, 240=>208, 241=>209, 242=>210, 243=>211, 244=>212, 245=>213, 246=>214, 248=>216, 249=>217, 250=>218, 251=>219, 252=>220, 253=>221, 254=>222, 255=>376, 256=>257, 257=>256, 258=>259, 259=>258, 260=>261, 261=>260, 262=>263, 263=>262, 264=>265, 265=>264, 266=>267, 267=>266, 268=>269, 269=>268, 270=>271, 271=>270, 272=>273, 273=>272, 274=>275, 275=>274, 276=>277, 277=>276, 278=>279, 279=>278, 280=>281, 281=>280, 282=>283, 283=>282, 284=>285, 285=>284, 286=>287, 287=>286, 288=>289, 289=>288, 290=>291, 291=>290, 292=>293, 293=>292, 294=>295, 295=>294, 296=>297, 297=>296, 298=>299, 299=>298, 300=>301, 301=>300, 302=>303, 303=>302, 306=>307, 307=>306, 308=>309, 309=>308, 310=>311, 311=>310, 313=>314, 314=>313, 315=>316, 316=>315, 317=>318, 318=>317, 319=>320, 320=>319, 321=>322, 322=>321, 323=>324, 324=>323, 325=>326, 326=>325, 327=>328, 328=>327, 330=>331, 331=>330, 332=>333, 333=>332, 334=>335, 335=>334, 336=>337, 337=>336, 338=>339, 339=>338, 340=>341, 341=>340, 342=>343, 343=>342, 344=>345, 345=>344, 346=>347, 347=>346, 348=>349, 349=>348, 350=>351, 351=>350, 352=>353, 353=>352, 354=>355, 355=>354, 356=>357, 357=>356, 358=>359, 359=>358, 360=>361, 361=>360, 362=>363, 363=>362, 364=>365, 365=>364, 366=>367, 367=>366, 368=>369, 369=>368, 370=>371, 371=>370, 372=>373, 373=>372, 374=>375, 375=>374, 376=>255, 377=>378, 378=>377, 379=>380, 380=>379, 381=>382, 382=>381, 384=>579, 385=>595, 386=>387, 387=>386, 388=>389, 389=>388, 390=>596, 391=>392, 392=>391, 393=>598, 394=>599, 395=>396, 396=>395, 398=>477, 399=>601, 400=>603, 401=>402, 402=>401, 403=>608, 404=>611, 405=>502, 406=>617, 407=>616, 408=>409, 409=>408, 410=>573, 412=>623, 413=>626, 414=>544, 415=>629, 416=>417, 417=>416, 418=>419, 419=>418, 420=>421, 421=>420, 423=>424, 424=>423, 425=>643, 428=>429, 429=>428, 430=>648, 431=>432, 432=>431, 433=>650, 434=>651, 435=>436, 436=>435, 437=>438, 438=>437, 439=>658, 440=>441, 441=>440, 444=>445, 445=>444, 447=>503, 452=>454, 454=>452, 455=>457, 457=>455, 458=>460, 460=>458, 461=>462, 462=>461, 463=>464, 464=>463, 465=>466, 466=>465, 467=>468, 468=>467, 469=>470, 470=>469, 471=>472, 472=>471, 473=>474, 474=>473, 475=>476, 476=>475, 477=>398, 478=>479, 479=>478, 480=>481, 481=>480, 482=>483, 483=>482, 484=>485, 485=>484, 486=>487, 487=>486, 488=>489, 489=>488, 490=>491, 491=>490, 492=>493, 493=>492, 494=>495, 495=>494, 497=>499, 499=>497, 500=>501, 501=>500, 502=>405, 503=>447, 504=>505, 505=>504, 506=>507, 507=>506, 508=>509, 509=>508, 510=>511, 511=>510, 512=>513, 513=>512, 514=>515, 515=>514, 516=>517, 517=>516, 518=>519, 519=>518, 520=>521, 521=>520, 522=>523, 523=>522, 524=>525, 525=>524, 526=>527, 527=>526, 528=>529, 529=>528, 530=>531, 531=>530, 532=>533, 533=>532, 534=>535, 535=>534, 536=>537, 537=>536, 538=>539, 539=>538, 540=>541, 541=>540, 542=>543, 543=>542, 544=>414, 546=>547, 547=>546, 548=>549, 549=>548, 550=>551, 551=>550, 552=>553, 553=>552, 554=>555, 555=>554, 556=>557, 557=>556, 558=>559, 559=>558, 560=>561, 561=>560, 562=>563, 563=>562, 570=>11365, 571=>572, 572=>571, 573=>410, 574=>11366, 575=>11390, 576=>11391, 577=>578, 578=>577, 579=>384, 580=>649, 581=>652, 582=>583, 583=>582, 584=>585, 585=>584, 586=>587, 587=>586, 588=>589, 589=>588, 590=>591, 591=>590, 592=>11375, 593=>11373, 594=>11376, 595=>385, 596=>390, 598=>393, 599=>394, 601=>399, 603=>400, 608=>403, 611=>404, 616=>407, 617=>406, 623=>412, 626=>413, 629=>415, 643=>425, 648=>430, 649=>580, 650=>433, 651=>434, 652=>581, 658=>439, 913=>945, 914=>946, 915=>947, 916=>948, 917=>949, 918=>950, 919=>951, 920=>952, 921=>953, 922=>954, 923=>955, 924=>956, 925=>957, 926=>958, 927=>959, 928=>960, 929=>961, 931=>963, 932=>964, 933=>965, 934=>966, 935=>967, 936=>968, 937=>969, 945=>913, 946=>914, 947=>915, 948=>916, 949=>917, 950=>918, 951=>919, 952=>920, 953=>921, 954=>922, 955=>923, 956=>924, 957=>925, 958=>926, 959=>927, 960=>928, 961=>929, 962=>931, 963=>931, 964=>932, 965=>933, 966=>934, 967=>935, 968=>936, 969=>937, 11365=>570, 11366=>574, 11373=>593, 11375=>592, 11376=>594, 11390=>575, 11391=>576, 64257=>64258, 64258=>64257 } end default_utf_case_pairs # initialize with above def self.zap_utf_case_pairs @@TRANS_HASH = {} end def self.rm_utf_case_pairs(obj) if obj.respond_to? :each_char obj.each_char do |ch| @@TRANS_HASH.delete str.ord end else obj.each do |elm| @@TRANS_HASH.delete elm end end end def self.add_utf_case_pairs(str) return if str.empty? raise "add_utf_case_pairs requires even number of characters" unless str.length.even? 0.step(str.length-2,2) do |x| @@TRANS_HASH[str[x].ord]=str[x+1].ord end end def self.get_utf_case_pairs(char=true) return @@TRANS_HASH.dup unless char rtn = {} @@TRANS_HASH.each_pair do |key,value| rtn[key.chr(Encoding::UTF_8)] = value.chr(Encoding::UTF_8) rtn[value.chr(Encoding::UTF_8)] = key.chr(Encoding::UTF_8) end return rtn end def add_opposing_case! aa = BitSet.lowercase_chars bb = BitSet.uppercase_chars ka = aa & self # subset lower case kb = bb & self # subset upper case @bits |= ka.to_i >> 32 @bits |= kb.to_i << 32 self.entropy = 32 + kb.recalculate_entropy! if self.entropy <= 123 self end def add_opposing_case dup.add_opposing_case! end def add_opposing_utf_case! add_opposing_case! tmp = @@TRANS_HASH.sort st = tmp.first[0] nd = tmp.last[0] self.each_member(st..nd) do |elm| unless @@TRANS_HASH[elm].nil? @bits |= 1 << @@TRANS_HASH[elm] @entropy = elm if elm > @entropy end end end def add_opposing_utf_case dup.add_opposing_utf_case! end end class BitSet def split ary = self.to_a len = ary.length lh = len >> 1 a = ary[0..(lh-1)].to_bset b = ary[lh..len].to_bset return [a,b] end def split! both = self.split rtn = both.last self.replace both.first return rtn end end # now the stuff that slows down the editor ... so last goes here class BitSet UTF_UPPER_CASE_CHARS = "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝÞðĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮIJĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽƁƂƄƆƇƉƊƋƎƏƐƑƓƔƖƗƘƜƝƟƠƢƤƦƧƩƬƮƯƱƲƳƵƷƸƼǍǏǑǓǕǗǙǛǞǠǢǤǦǨǪǬǮǴǶǷǸǺǼǾȀȂȄȆȈȊȌȎȐȒȔȖȘȚȜȞȠȢȤȦȨȪȬȮȰȲȺȻȽȾɁɃɄɅɆɈɊɌɎΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩⱭⱯⱰⱾⱿfl" UTF_LOWER_CASE_CHARS = "Ðàáâãäåçèéêëìíîïñòóôõöøùúûüýþÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįĵķĺļľŀłńņňŋōŏőŕŗřśŝşšţťŧũūŭůűųŵŷźżžƀƃƅƈƌƒƕƙƚƞơƥƨƭưƴƶƹƽƿǎǐǒǔǖǘǚǜǝǟǡǥǧǩǫǭǯǵǹǻǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȥȧȩȫȭȯȱȳȼȿɀɂɇɉɋɍɏɐɑɒɓɔɖɗəɛɠɣɨɩɯɲɵʀʃʈʉʊʋʌʒαβγδεζηθικλμνξοπρςστυφχψωⱥⱦfi" UTF_LOWER_CASE_DUAL_CHARS = "ȸȹijnjdždzʣʥæǣǽƣœœɶʦljʪʫijȣ" UTF_UPPER_CASE_DUAL_CHARS = "NJDŽDZÆǢǼƢƢŒLJIJIJȢ" UTF_MIXED_CASE_DUAL_CHARS = "LjNjDžDz" def self.default_utf_sets @@UTF_UPPER_CASE_CHARS = UTF_UPPER_CASE_CHARS @@UTF_LOWER_CASE_CHARS = UTF_LOWER_CASE_CHARS @@UTF_LOWER_CASE_DUAL_CHARS = UTF_LOWER_CASE_DUAL_CHARS @@UTF_UPPER_CASE_DUAL_CHARS = UTF_UPPER_CASE_DUAL_CHARS @@UTF_MIXED_CASE_DUAL_CHARS = UTF_MIXED_CASE_DUAL_CHARS @@UTF_UPPER_CASE_SET = UTF_UPPER_CASE_CHARS.to_bset @@UTF_LOWER_CASE_SET = UTF_LOWER_CASE_CHARS.to_bset @@UTF_LOWER_CASE_DUAL_SET = UTF_LOWER_CASE_DUAL_CHARS.to_bset @@UTF_UPPER_CASE_DUAL_SET = UTF_UPPER_CASE_DUAL_CHARS.to_bset @@UTF_MIXED_CASE_DUAL_SET = UTF_MIXED_CASE_DUAL_CHARS.to_bset end default_utf_sets def self.zap_utf_sets @@UTF_UPPER_CASE_CHARS = "" @@UTF_LOWER_CASE_CHARS = "" @@UTF_LOWER_CASE_DUAL_CHARS = "" @@UTF_UPPER_CASE_DUAL_CHARS = "" @@UTF_MIXED_CASE_DUAL_CHARS = "" @@UTF_UPPER_CASE_SET = "".to_bset @@UTF_LOWER_CASE_SET = "".to_bset @@UTF_LOWER_CASE_DUAL_SET = "".to_bset @@UTF_UPPER_CASE_DUAL_SET = "".to_bset @@UTF_MIXED_CASE_DUAL_SET = "".to_bset end def self.modify_utf_sets(*prms) flag_add = true target_chars = nil target_set = nil source = nil prms.each do |prm| if prm.kind_of? String source = prm elsif prm.kind_of? BitSet source = prm elsif prm.kind_of? Symbol if prm == :rm flag_add = false elsif prm == :add flag_add = true elsif prm == :mix target_chars = "@@UTF_MIXED_CASE_DUAL_CHARS" target_set = "@@UTF_MIXED_CASE_DUAL_SET" elsif prm == :upper target_chars = "@@UTF_UPPER_CASE_CHARS" target_set = "@@UTF_UPPER_CASE_SET" elsif prm == :lower target_chars = "@@UTF_LOWER_CASE_CHARS" target_set = "@@UTF_LOWER_CASE_SET" elsif prm == :dual_upper target_chars = "@@UTF_UPPER_CASE_DUAL_CHARS" target_set = "@@UTF_UPPER_CASE_DUAL_SET" elsif prm == :dual_lower target_chars = "@@UTF_LOWER_CASE_DUAL_CHARS" target_set = "@@UTF_LOWER_CASE_DUAL_SET" end end end return false if target_chars.nil? return false if source.nil? return false unless source ** target_set if flag_add eval("#{target_set} |= source") eval("#{target_chars} |= #{target_set}.to_s") else eval("#{target_set} -= source") eval("#{target_chars} -= #{target_set}.to_s") end return true end def self.uppercase_utf_chars return @@UTF_UPPER_CASE_SET.dup end def self.lowercase_utf_chars return @@UTF_LOWER_CASE_SET.dup end def self.mixcase_utf_chars return @@UTF_MIXED_CASE_DUAL_SET.dup end def self.dual_uppercase_utf_chars return @@UTF_UPPER_CASE_DUAL_SET.dup end def self.dual_lowercase_utf_chars return @@UTF_LOWER_CASE_DUAL_SET.dup end end =begin # BAD IDEA ... nil is usually an error class NilClass # treat nil as empty set def to_set return Set.new end end =end