/** * \file modp_xml.h * \brief Experimental XML/HTML decoder * * This is mostly experimental. */ /* *
* High Performance XML Decoder (for now) * * Copyright © 2012-2016 Nick Galbreath * All rights reserved. * * Released under MIT license. See LICENSE fro details. * * https://github.com/client9/stringencoders * **/ #ifndef COM_MODP_STRINGENCODERS_XML #define COM_MODP_STRINGENCODERS_XML #include "modp_stdint.h" #ifdef __cplusplus #define BEGIN_C extern "C" { #define END_C } #else #define BEGIN_C #define END_C #endif BEGIN_C /** * \brief Validates a unicode code point is valid for HTML (undefined * or non-white-space control char) * * \param[in] val a unicode char expressed as a uint32_t * \return 0 if invalid, else returns passes back the input value. * * See http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#character-references for more details * * This is only exposed for testing. It is not designed for public use. */ int modp_xml_validate_unicode(int val); /** * \brief converts a unicode char expressed as uint32_t into a UTF-8 byte sequence. * \param[out] dest assumed to have at least 4 chars available in buffer. * \param[in] uval A unicode character expressed as a uint32_t type * \return 0 if input value is invalid or not a unicode character, else * returns number of bytes written to dest. * * This is only exposed for testing. It is not designed for public use. */ size_t modp_xml_unicode_char_to_utf8(char* dest, int uval); /** * \brief parse a hex encoded entity between "" and ";" * \param[in] s a buffer pointing at the first char after "&$x" * \param[in] len the length of string between "" and ";" * \return -1 if invalid, otherwise the unicode character value * * This is only exposed for testing. It is not designed for public use. */ int modp_xml_parse_hex_entity(const char* s, size_t len); /** * \brief parse a numerical decimal XML entity, eg. &x39; * * \param[in] s the buffer pointing to first char after ''. * \param[in] len the length between '' and ';'. It is expected * that all chars between are to be decimal digits. * \return -1 if invalid, else the unicode numeric value * * Exposed for testing. Not designed to be useful for public consumption. */ int modp_xml_parse_dec_entity(const char* s, size_t len); /** * \brief XML decode a string * \param[out] dest output string. Must * \param[in] str The input string * \param[in] len The length of the input string, excluding any * final null byte. * \return the final size of the output, excluding any ending null byte. * * Decode numerical entities (decimal or hexadecimal), and following named * entities: * * ' * * " * * & * * < * * > * */ size_t modp_xml_decode(char* dest, const char* str, size_t len); /** * \brief XML encode a UTF-8 string * \param[out] dest output string. * \param[in] str The input string * \param[in] len The length of the input string, excluding any * final null byte. * \return the final size of the output, excluding any ending null byte. * Encodes an assumed valid UTF-8 input and escapes * * ' * * " * * & * * < * * > */ size_t modp_xml_encode(char* dest, const char* str, size_t len); size_t modp_xml_min_encode_strlen(const char* str, size_t len); END_C #ifdef __cplusplus #include