Sha256: 5665c42ba1795e91487b6bd6c3e192c2624ff7cfd123a5750fa3afb76e565425

Contents?: true

Size: 1.86 KB

Versions: 7

Compression:

Stored size: 1.86 KB

Contents

/*
	libcharguess	-	Guess the encoding/charset of a string
    Copyright (C) 2003  Stephane Corbe <noubi@users.sourceforge.net>
	Based on Mozilla sources

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "UTF8Prober.h"

void  nsUTF8Prober::Reset(void)
{
  mCodingSM->Reset(); 
  mNumOfMBChar = 0;
  mState = eDetecting;
}

nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen)
{
  nsSMState codingState;

  for (PRUint32 i = 0; i < aLen; i++)
  {
    codingState = mCodingSM->NextState(aBuf[i]);
    if (codingState == eError)
    {
      mState = eNotMe;
      break;
    }
    if (codingState == eItsMe)
    {
      mState = eFoundIt;
      break;
    }
    if (codingState == eStart)
    {
      if (mCodingSM->GetCurrentCharLen() >= 2)
        mNumOfMBChar++;
    }
  }

  if (mState == eDetecting)
    if (GetConfidence() > SHORTCUT_THRESHOLD)
      mState = eFoundIt;
  return mState;
}

#define ONE_CHAR_PROB   (float)0.50

float nsUTF8Prober::GetConfidence(void)
{
  float unlike = (float)0.99;

  if (mNumOfMBChar < 6)
  {
    for (PRUint32 i = 0; i < mNumOfMBChar; i++)
      unlike *= ONE_CHAR_PROB;
    return (float)1.0 - unlike;
  }
  else
    return (float)0.99;
}

Version data entries

7 entries across 7 versions & 1 rubygems

Version Path
charguess-1.3.20111021164500 ext/libcharguess/cpp/UTF8Prober.cpp
charguess-1.3.20110226181011 ext/libcharguess/cpp/UTF8Prober.cpp
charguess-1.3.20110215234539 ext/libcharguess/cpp/UTF8Prober.cpp
charguess-1.3 ext/libcharguess/cpp/UTF8Prober.cpp
charguess-1.2 ext/libcharguess/cpp/UTF8Prober.cpp
charguess-1.1 ext/libcharguess/cpp/UTF8Prober.cpp
charguess-1.0 ext/libcharguess/cpp/UTF8Prober.cpp