lib/rchardet/utf8prober.rb in edouard-rchardet-1.3.3 vs lib/rchardet/utf8prober.rb in edouard-rchardet-1.3.4.0

- old
+ new

@@ -12,16 +12,16 @@ # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### @@ -40,47 +40,48 @@ super() @_mCodingSM.reset() @_mNumOfMBChar = 0 end - def get_charset_name - return "utf-8" + def charset_name + return "UTF-8" end def feed(aBuf) - for c in aBuf.split('') - codingState = @_mCodingSM.next_state(c) - if codingState == EError - @_mState = ENotMe - break - elsif codingState == EItsMe - @_mState = EFoundIt - break - elsif codingState == EStart - if @_mCodingSM.get_current_charlen() >= 2 - @_mNumOfMBChar += 1 - end - end + aBuf.each_byte do |b| + c = b.chr + codingState = @_mCodingSM.next_state(c) + if codingState == EError + @_mState = ENotMe + break + elsif codingState == EItsMe + @_mState = EFoundIt + break + elsif codingState == EStart + if @_mCodingSM.current_charlen >= 2 + @_mNumOfMBChar += 1 + end + end end - if get_state == EDetecting - if get_confidence > SHORTCUT_THRESHOLD - @_mState = EFoundIt - end + if state == EDetecting + if confidence > SHORTCUT_THRESHOLD + @_mState = EFoundIt + end end - return get_state + return state end - def get_confidence + def confidence unlike = 0.99 if @_mNumOfMBChar < 6 - for i in (0...@_mNumOfMBChar) - unlike = unlike * ONE_CHAR_PROB - end - return 1.0 - unlike - else - return unlike + (0...@_mNumOfMBChar).each do + unlike *= ONE_CHAR_PROB + end + return 1.0 - unlike + else + return unlike end end end end