/** * \file modp_xml.h * \brief Experimental XML/HTML decoder * * This is mostly experimental. */ /* *
 * High Performance XML Decoder (for now)
 *
 * Copyright © 2012-2016  Nick Galbreath
 * All rights reserved.
 *
 * Released under MIT license. See LICENSE fro details.
 *
 * https://github.com/client9/stringencoders
 *
 * 
*/ #ifndef COM_MODP_STRINGENCODERS_XML #define COM_MODP_STRINGENCODERS_XML #include "modp_stdint.h" #ifdef __cplusplus #define BEGIN_C extern "C" { #define END_C } #else #define BEGIN_C #define END_C #endif BEGIN_C /** * \brief Validates a unicode code point is valid for HTML (undefined * or non-white-space control char) * * \param[in] val a unicode char expressed as a uint32_t * \return 0 if invalid, else returns passes back the input value. * * See http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#character-references for more details * * This is only exposed for testing. It is not designed for public use. */ int modp_xml_validate_unicode(int val); /** * \brief converts a unicode char expressed as uint32_t into a UTF-8 byte sequence. * \param[out] dest assumed to have at least 4 chars available in buffer. * \param[in] uval A unicode character expressed as a uint32_t type * \return 0 if input value is invalid or not a unicode character, else * returns number of bytes written to dest. * * This is only exposed for testing. It is not designed for public use. */ size_t modp_xml_unicode_char_to_utf8(char* dest, int uval); /** * \brief parse a hex encoded entity between "&#x" and ";" * \param[in] s a buffer pointing at the first char after "&$x" * \param[in] len the length of string between "&#x" and ";" * \return -1 if invalid, otherwise the unicode character value * * This is only exposed for testing. It is not designed for public use. */ int modp_xml_parse_hex_entity(const char* s, size_t len); /** * \brief parse a numerical decimal XML entity, eg. &x39; * * \param[in] s the buffer pointing to first char after '&#'. * \param[in] len the length between '&#' and ';'. It is expected * that all chars between are to be decimal digits. * \return -1 if invalid, else the unicode numeric value * * Exposed for testing. Not designed to be useful for public consumption. */ int modp_xml_parse_dec_entity(const char* s, size_t len); /** * \brief XML decode a string * \param[out] dest output string. Must * \param[in] str The input string * \param[in] len The length of the input string, excluding any * final null byte. * \return the final size of the output, excluding any ending null byte. * * Decode numerical entities (decimal or hexadecimal), and following named * entities: * * ' * * " * * & * * < * * > * */ size_t modp_xml_decode(char* dest, const char* str, size_t len); /** * \brief XML encode a UTF-8 string * \param[out] dest output string. * \param[in] str The input string * \param[in] len The length of the input string, excluding any * final null byte. * \return the final size of the output, excluding any ending null byte. * Encodes an assumed valid UTF-8 input and escapes * * ' * * " * * & * * < * * > */ size_t modp_xml_encode(char* dest, const char* str, size_t len); size_t modp_xml_min_encode_strlen(const char* str, size_t len); END_C #ifdef __cplusplus #include #include namespace modp { /** * Url decode a string. * This function does not allocate memory. * * \param[in,out] s the string to be decoded * \return a reference to the input string. * There is no error case, bad characters are passed through */ inline std::string& xml_decode(std::string& s) { size_t d = modp_xml_decode(const_cast(s.data()), s.data(), s.size()); s.erase(d, std::string::npos); return s; } inline std::string xml_decode(const char* str) { std::string s(str); xml_decode(s); return s; } inline std::string xml_decode(const char* str, size_t len) { std::string s(str, len); xml_decode(s); return s; } inline std::string xml_decode(const std::string& s) { std::string x(s); xml_decode(x); return x; } } #endif #endif