Sha256: 5665c42ba1795e91487b6bd6c3e192c2624ff7cfd123a5750fa3afb76e565425
Contents?: true
Size: 1.86 KB
Versions: 7
Compression:
Stored size: 1.86 KB
Contents
/* libcharguess - Guess the encoding/charset of a string Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net> Based on Mozilla sources This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "UTF8Prober.h" void nsUTF8Prober::Reset(void) { mCodingSM->Reset(); mNumOfMBChar = 0; mState = eDetecting; } nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen) { nsSMState codingState; for (PRUint32 i = 0; i < aLen; i++) { codingState = mCodingSM->NextState(aBuf[i]); if (codingState == eError) { mState = eNotMe; break; } if (codingState == eItsMe) { mState = eFoundIt; break; } if (codingState == eStart) { if (mCodingSM->GetCurrentCharLen() >= 2) mNumOfMBChar++; } } if (mState == eDetecting) if (GetConfidence() > SHORTCUT_THRESHOLD) mState = eFoundIt; return mState; } #define ONE_CHAR_PROB (float)0.50 float nsUTF8Prober::GetConfidence(void) { float unlike = (float)0.99; if (mNumOfMBChar < 6) { for (PRUint32 i = 0; i < mNumOfMBChar; i++) unlike *= ONE_CHAR_PROB; return (float)1.0 - unlike; } else return (float)0.99; }
Version data entries
7 entries across 7 versions & 1 rubygems