// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_ #define ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_ #include #include #include "encodings/compact_lang_det/win/cld_scopedptr.h" #if (WINVER < 0x0600) // Copied from winnls.h, we're not using the latest SDK yet. typedef enum _NORM_FORM { NormalizationOther = 0, NormalizationC = 0x1, NormalizationD = 0x2, NormalizationKC = 0x5, NormalizationKD = 0x6 } NORM_FORM; #endif // Gives you back a normalized version of the input text. Normalization is // performed to the specified form. // Instance lifetime should be within the lifetime span of the 'text'. class NormalizedUnicodeText { public: // Creates an empty instance of NormalizedUnicodeText. NormalizedUnicodeText(); // Creates a fully initialized instance of NormalizedUnicodeText. // [in] normalization_form - normalization rule set (see MSDN for details). // [in] text - zero-terminated UTF-16 encoded string. // Returns 0 in case of success, Win32 error code in case of failure. // In case of failure, get() returns the original text. DWORD Normalize(NORM_FORM normalization_form, const WCHAR* text); // Returns pointer to the normalized text. const WCHAR* get() const { return normalized_text_; } private: // Normalizes 'text' by the 'normalization_form' rules. // [in] normalization_form - normalization rule set (see MSDN for details). // [in] text - zero-terminated UTF-16 encoded string. // [out] error_code - Win32 error code. const WCHAR* TryToNormalizeText(NORM_FORM normalization_form, const WCHAR* text, DWORD *error_code); // Pointer to the normalized text. const WCHAR* normalized_text_; // When the source text is already normalized by the requested normalization // form, text_ is not used and normalized_text_ just points to the source // text. When the source text requres normalization, text_ contains normalized // version of the source text and normalized_text_ points to this buffer. // Since CLD requires NormalizationC form and the overwhelming majority of all // texts in the Internet is already normalized to this form, it's expected // that this class will not introduce any runtime memory overhead. scoped_array text_; DISALLOW_COPY_AND_ASSIGN(NormalizedUnicodeText); }; #endif // ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_