ext/fast_jsonparser/simdjson.h in fast_jsonparser-0.2.0 vs ext/fast_jsonparser/simdjson.h in fast_jsonparser-0.3.0

- old
+ new

@@ -1,6 +1,6 @@ -/* auto-generated on Thu 2 Apr 2020 18:58:25 EDT. Do not edit! */ +/* auto-generated on Mon Jul 6 18:16:52 EDT 2020. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H /** @@ -23,50 +23,31 @@ #else #define SIMDJSON_CPLUSPLUS __cplusplus #endif #endif -#if (SIMDJSON_CPLUSPLUS < 201703L) -#error simdjson requires a compiler compliant with the C++17 standard +// C++ 17 +#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) +#define SIMDJSON_CPLUSPLUS17 1 #endif -#endif // SIMDJSON_COMPILER_CHECK_H -/* end file include/simdjson/compiler_check.h */ +// C++ 14 +#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) +#define SIMDJSON_CPLUSPLUS14 1 +#endif -// Public API -/* begin file include/simdjson/simdjson_version.h */ -// /include/simdjson/simdjson_version.h automatically generated by release.py, -// do not change by hand -#ifndef SIMDJSON_SIMDJSON_VERSION_H -#define SIMDJSON_SIMDJSON_VERSION_H +// C++ 11 +#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) +#define SIMDJSON_CPLUSPLUS11 1 +#endif -/** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION 0.3.1 +#ifndef SIMDJSON_CPLUSPLUS11 +#error simdjson requires a compiler compliant with the C++11 standard +#endif -namespace simdjson { -enum { - /** - * The major version (MAJOR.minor.revision) of simdjson being used. - */ - SIMDJSON_VERSION_MAJOR = 0, - /** - * The minor version (major.MINOR.revision) of simdjson being used. - */ - SIMDJSON_VERSION_MINOR = 3, - /** - * The revision (major.minor.REVISION) of simdjson being used. - */ - SIMDJSON_VERSION_REVISION = 1 -}; -} // namespace simdjson - -#endif // SIMDJSON_SIMDJSON_VERSION_H -/* end file include/simdjson/simdjson_version.h */ -/* begin file include/simdjson/error.h */ -#ifndef SIMDJSON_ERROR_H -#define SIMDJSON_ERROR_H - +#endif // SIMDJSON_COMPILER_CHECK_H +/* end file include/simdjson/compiler_check.h */ /* begin file include/simdjson/common_defs.h */ #ifndef SIMDJSON_COMMON_DEFS_H #define SIMDJSON_COMMON_DEFS_H #include <cassert> @@ -75,51 +56,106 @@ #define SIMDJSON_PORTABILITY_H #include <cstddef> #include <cstdint> #include <cstdlib> +#include <cfloat> +#include <cassert> + #ifdef _MSC_VER +#define SIMDJSON_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * * computed gotos. + * + */ +#ifdef __clang__ +// clang under visual studio +#define SIMDJSON_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +// https://en.wikipedia.org/wiki/C_alternative_tokens +// This header should have no effect, except maybe +// under Visual Studio. #include <iso646.h> #endif #if defined(__x86_64__) || defined(_M_AMD64) -#define IS_X86_64 1 +#define SIMDJSON_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define SIMDJSON_IS_ARM64 1 +#else +#define SIMDJSON_IS_32BITS 1 + +// We do not support 32-bit platforms, but it can be +// handy to identify them. +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 #endif -#if defined(__aarch64__) || defined(_M_ARM64) -#define IS_ARM64 1 -#endif +#endif // defined(__x86_64__) || defined(_M_AMD64) + +#ifdef SIMDJSON_IS_32BITS +#pragma message("The simdjson library is designed \ +for 64-bit processors and it seems that you are not \ +compiling for a known 64-bit platform. All fast kernels \ +will be disabled and performance may be poor. Please \ +use a 64-bit target such as x64 or 64-bit ARM.") +#endif // SIMDJSON_IS_32BITS + // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ #undef STRINGIFY #define STRINGIFY_IMPLEMENTATION_(a) #a #define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a) #ifndef SIMDJSON_IMPLEMENTATION_FALLBACK #define SIMDJSON_IMPLEMENTATION_FALLBACK 1 #endif -#if IS_ARM64 +#if SIMDJSON_IS_ARM64 #ifndef SIMDJSON_IMPLEMENTATION_ARM64 #define SIMDJSON_IMPLEMENTATION_ARM64 1 #endif #define SIMDJSON_IMPLEMENTATION_HASWELL 0 #define SIMDJSON_IMPLEMENTATION_WESTMERE 0 -#endif // IS_ARM64 +#endif // SIMDJSON_IS_ARM64 -#if IS_X86_64 +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. +#if SIMDJSON_IS_X86_64 #ifndef SIMDJSON_IMPLEMENTATION_HASWELL #define SIMDJSON_IMPLEMENTATION_HASWELL 1 #endif #ifndef SIMDJSON_IMPLEMENTATION_WESTMERE #define SIMDJSON_IMPLEMENTATION_WESTMERE 1 #endif #define SIMDJSON_IMPLEMENTATION_ARM64 0 -#endif // IS_X86_64 +#endif // SIMDJSON_IS_X86_64 -// we are going to use runtime dispatch -#ifdef IS_X86_64 +// We are going to use runtime dispatch. +#ifdef SIMDJSON_IS_X86_64 #ifdef __clang__ // clang does not have GCC push pop // warning: clang attribute push can't be used within a namespace in clang up // til 8.0 so TARGET_REGION and UNTARGET_REGION must be *outside* of a // namespace. @@ -145,40 +181,64 @@ // under GCC and CLANG, we use these two macros #define TARGET_HASWELL TARGET_REGION("avx2,bmi,pclmul,lzcnt") #define TARGET_WESTMERE TARGET_REGION("sse4.2,pclmul") #define TARGET_ARM64 -// Threading is disabled -#undef SIMDJSON_THREADS_ENABLED // Is threading enabled? #if defined(BOOST_HAS_THREADS) || defined(_REENTRANT) || defined(_MT) +#ifndef SIMDJSON_THREADS_ENABLED #define SIMDJSON_THREADS_ENABLED #endif +#endif +// workaround for large stack sizes under -O0. +// https://github.com/simdjson/simdjson/issues/691 +#ifdef __APPLE__ +#ifndef __OPTIMIZE__ +// Apple systems have small stack sizes in secondary threads. +// Lack of compiler optimization may generate high stack usage. +// Users may want to disable threads for safety, but only when +// in debug mode which we detect by the fact that the __OPTIMIZE__ +// macro is not defined. +#undef SIMDJSON_THREADS_ENABLED +#endif +#endif + + +#if SIMDJSON_DO_NOT_USE_THREADS_NO_MATTER_WHAT +// No matter what happened, we undefine SIMDJSON_THREADS_ENABLED and so disable threads. +#undef SIMDJSON_THREADS_ENABLED +#endif + + #if defined(__clang__) #define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) #elif defined(__GNUC__) #define NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) #else #define NO_SANITIZE_UNDEFINED #endif -#ifdef _MSC_VER -#include <intrin.h> // visual studio -#endif - -#ifdef _MSC_VER +#ifdef SIMDJSON_VISUAL_STUDIO +// This is one case where we do not distinguish between +// regular visual studio and clang under visual studio. +// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) #define simdjson_strcasecmp _stricmp +#define simdjson_strncasecmp _strnicmp #else +// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). +// So they are only useful for ASCII in our context. +// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings #define simdjson_strcasecmp strcasecmp +#define simdjson_strncasecmp strncasecmp #endif namespace simdjson { /** @private portable version of posix_memalign */ static inline void *aligned_malloc(size_t alignment, size_t size) { void *p; -#ifdef _MSC_VER +#ifdef SIMDJSON_VISUAL_STUDIO p = _aligned_malloc(size, alignment); #elif defined(__MINGW32__) || defined(__MINGW64__) p = __mingw_aligned_malloc(size, alignment); #else // somehow, if this is used before including "x86intrin.h", it creates an @@ -198,11 +258,11 @@ /** @private */ static inline void aligned_free(void *mem_block) { if (mem_block == nullptr) { return; } -#ifdef _MSC_VER +#ifdef SIMDJSON_VISUAL_STUDIO _aligned_free(mem_block); #elif defined(__MINGW32__) || defined(__MINGW64__) __mingw_aligned_free(mem_block); #else free(mem_block); @@ -211,10 +271,28 @@ /** @private */ static inline void aligned_free_char(char *mem_block) { aligned_free((void *)mem_block); } + +#ifdef NDEBUG + +#ifdef SIMDJSON_VISUAL_STUDIO +#define SIMDJSON_UNREACHABLE() __assume(0) +#define SIMDJSON_ASSUME(COND) __assume(COND) +#else +#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); +#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) +#endif + +#else // NDEBUG + +#define SIMDJSON_UNREACHABLE() assert(0); +#define SIMDJSON_ASSUME(COND) assert(COND) + +#endif + } // namespace simdjson #endif // SIMDJSON_PORTABILITY_H /* end file include/simdjson/portability.h */ namespace simdjson { @@ -248,96 +326,1756 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024; } // namespace simdjson #if defined(__GNUC__) -// Marks a block with a name so that MCA analysis can see it. -#define BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); -#define END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); -#define DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); + // Marks a block with a name so that MCA analysis can see it. + #define BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); #else -#define BEGIN_DEBUG_BLOCK(name) -#define END_DEBUG_BLOCK(name) -#define DEBUG_BLOCK(name, block) + #define BEGIN_DEBUG_BLOCK(name) + #define END_DEBUG_BLOCK(name) + #define DEBUG_BLOCK(name, block) #endif -#if !defined(_MSC_VER) && !defined(SIMDJSON_NO_COMPUTED_GOTO) -// Implemented using Labels as Values which works in GCC and CLANG (and maybe -// also in Intel's compiler), but won't work in MSVC. -#define SIMDJSON_USE_COMPUTED_GOTO +#if !defined(SIMDJSON_REGULAR_VISUAL_STUDIO) && !defined(SIMDJSON_NO_COMPUTED_GOTO) + // We assume here that *only* regular visual studio + // does not support computed gotos. + // Implemented using Labels as Values which works in GCC and CLANG (and maybe + // also in Intel's compiler), but won't work in MSVC. + // Compute gotos are good for performance, enable them if you can. + #define SIMDJSON_USE_COMPUTED_GOTO #endif // Align to N-byte boundary #define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) #define ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) #define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) -#ifdef _MSC_VER -#define really_inline __forceinline -#define never_inline __declspec(noinline) +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) -#define UNUSED -#define WARN_UNUSED + #define really_inline __forceinline + #define never_inline __declspec(noinline) -#ifndef likely -#define likely(x) x + #define UNUSED + #define WARN_UNUSED + + #ifndef likely + #define likely(x) x + #endif + #ifndef unlikely + #define unlikely(x) x + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #if defined(_MSC_VER) && (_MSC_VER>=1910) + #if __has_include(<CppCoreCheck\Warnings.h>) + #include <CppCoreCheck\Warnings.h> + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) + #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) + +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + + #define really_inline inline __attribute__((always_inline, unused)) + #define never_inline inline __attribute__((noinline, unused)) + + #define UNUSED __attribute__((unused)) + #define WARN_UNUSED __attribute__((warn_unused_result)) + + #ifndef likely + #define likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef unlikely + #define unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) + #define SIMDJSON_PRAGMA(P) _Pragma(#P) + #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) + #if defined(SIMDJSON_CLANG_VISUAL_STUDIO) + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + + + +#endif // MSC_VER + +#if defined(SIMDJSON_VISUAL_STUDIO) + /** + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio. + */ + #if SIMDJSON_USING_LIBRARY + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) + #else + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) + #endif +#else + #define SIMDJSON_DLLIMPORTEXPORT #endif -#ifndef unlikely -#define unlikely(x) x + +// C++17 requires string_view. +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_HAS_STRING_VIEW #endif -#define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) -#define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) -#define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) -#define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) -#define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) +// This macro (__cpp_lib_string_view) has to be defined +// for C++17 and better, but if it is otherwise defined, +// we are going to assume that string_view is available +// even if we do not have C++17 support. +#ifdef __cpp_lib_string_view +#define SIMDJSON_HAS_STRING_VIEW +#endif -#else // MSC_VER +// Some systems have string_view even if we do not have C++17 support, +// and even if __cpp_lib_string_view is undefined, it is the case +// with Apple clang version 11. +// We must handle it. *This is important.* +#ifndef SIMDJSON_HAS_STRING_VIEW +#if defined __has_include +// do not combine the next #if with the previous one (unsafe) +#if __has_include (<string_view>) +// now it is safe to trigger the include +#include <string_view> // though the file is there, it does not follow that we got the implementation +#if defined(_LIBCPP_STRING_VIEW) +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceeded C++17, +// included string_view. +// This means that we have string_view *even though* we may not have C++17. +#define SIMDJSON_HAS_STRING_VIEW +#endif // _LIBCPP_STRING_VIEW +#endif // __has_include (<string_view>) +#endif // defined __has_include +#endif // def SIMDJSON_HAS_STRING_VIEW +// end of complicated but important routine to try to detect string_view. +// +// Backfill std::string_view using nonstd::string_view on systems where +// we expect that string_view is missing. Important: if we get this wrong, +// we will end up with two string_view definitions and potential trouble. +// That is why we work so hard above to avoid it. +// +#ifndef SIMDJSON_HAS_STRING_VIEW +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +/* begin file include/simdjson/nonstd/string_view.hpp */ +// Copyright 2017-2019 by Martin Moene +// +// string-view lite, a C++17-like string_view for C++98 and later. +// For more information see https://github.com/martinmoene/string-view-lite +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#define really_inline inline __attribute__((always_inline, unused)) -#define never_inline inline __attribute__((noinline, unused)) +#pragma once -#define UNUSED __attribute__((unused)) -#define WARN_UNUSED __attribute__((warn_unused_result)) +#ifndef NONSTD_SV_LITE_H_INCLUDED +#define NONSTD_SV_LITE_H_INCLUDED -#ifndef likely -#define likely(x) __builtin_expect(!!(x), 1) +#define string_view_lite_MAJOR 1 +#define string_view_lite_MINOR 4 +#define string_view_lite_PATCH 0 + +#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) + +#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) +#define nssv_STRINGIFY_( x ) #x + +// string-view lite configuration: + +#define nssv_STRING_VIEW_DEFAULT 0 +#define nssv_STRING_VIEW_NONSTD 1 +#define nssv_STRING_VIEW_STD 2 + +#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) +# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) #endif -#ifndef unlikely -#define unlikely(x) __builtin_expect(!!(x), 0) + +#if defined( nssv_CONFIG_SELECT_STD_STRING_VIEW ) || defined( nssv_CONFIG_SELECT_NONSTD_STRING_VIEW ) +# error nssv_CONFIG_SELECT_STD_STRING_VIEW and nssv_CONFIG_SELECT_NONSTD_STRING_VIEW are deprecated and removed, please use nssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_... #endif -#define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") -// gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary -#define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ - SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) -#define SIMDJSON_PRAGMA(P) _Pragma(#P) -#define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) -#define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) -#define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") +#ifndef nssv_CONFIG_STD_SV_OPERATOR +# define nssv_CONFIG_STD_SV_OPERATOR 0 +#endif -#endif // MSC_VER +#ifndef nssv_CONFIG_USR_SV_OPERATOR +# define nssv_CONFIG_USR_SV_OPERATOR 1 +#endif +#ifdef nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 +#endif + +// Control presence of exception handling (try and auto discover): + +#ifndef nssv_CONFIG_NO_EXCEPTIONS +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND) +# define nssv_CONFIG_NO_EXCEPTIONS 0 +# else +# define nssv_CONFIG_NO_EXCEPTIONS 1 +# endif +#endif + +// C++ language version detection (C++20 is speculative): +// Note: VC14.0/1900 (VS2015) lacks too much from C++14. + +#ifndef nssv_CPLUSPLUS +# if defined(_MSVC_LANG ) && !defined(__clang__) +# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) +# else +# define nssv_CPLUSPLUS __cplusplus +# endif +#endif + +#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) +#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) +#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) +#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202000L ) + +// use C++17 std::string_view if available and requested: + +#if nssv_CPP17_OR_GREATER && defined(__has_include ) +# if __has_include( <string_view> ) +# define nssv_HAVE_STD_STRING_VIEW 1 +# else +# define nssv_HAVE_STD_STRING_VIEW 0 +# endif +#else +# define nssv_HAVE_STD_STRING_VIEW 0 +#endif + +#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) + +#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) +#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH + +// +// Use C++17 std::string_view: +// + +#if nssv_USES_STD_STRING_VIEW + +#include <string_view> + +// Extensions for std::string: + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +template< class CharT, class Traits, class Allocator = std::allocator<CharT> > +std::basic_string<CharT, Traits, Allocator> +to_string( std::basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() ) +{ + return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string_view<CharT, Traits> +to_string_view( std::basic_string<CharT, Traits, Allocator> const & s ) +{ + return std::basic_string_view<CharT, Traits>( s.data(), s.size() ); +} + +// Literal operators sv and _sv: + +#if nssv_CONFIG_STD_SV_OPERATOR + +using namespace std::literals::string_view_literals; + +#endif + +#if nssv_CONFIG_USR_SV_OPERATOR + +inline namespace literals { +inline namespace string_view_literals { + + +constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +{ + return std::string_view{ str, len }; +} + +constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +{ + return std::u16string_view{ str, len }; +} + +constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +{ + return std::u32string_view{ str, len }; +} + +constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +{ + return std::wstring_view{ str, len }; +} + +}} // namespace literals::string_view_literals + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +} // namespace nonstd + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +using std::string_view; +using std::wstring_view; +using std::u16string_view; +using std::u32string_view; +using std::basic_string_view; + +// literal "sv" and "_sv", see above + +using std::operator==; +using std::operator!=; +using std::operator<; +using std::operator<=; +using std::operator>; +using std::operator>=; + +using std::operator<<; + +} // namespace nonstd + +#else // nssv_HAVE_STD_STRING_VIEW + +// +// Before C++17: use string_view lite: +// + +// Compiler versions: +// +// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) +// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) +// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) +// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) +// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) +// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) +// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) +// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) +// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) +// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) +// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) + +#if defined(_MSC_VER ) && !defined(__clang__) +# define nssv_COMPILER_MSVC_VER (_MSC_VER ) +# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) +#else +# define nssv_COMPILER_MSVC_VER 0 +# define nssv_COMPILER_MSVC_VERSION 0 +#endif + +#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) + +#if defined(__clang__) +# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define nssv_COMPILER_CLANG_VERSION 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define nssv_COMPILER_GNUC_VERSION 0 +#endif + +// half-open range [lo..hi): +#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) + +// Presence of language and library features: + +#ifdef _HAS_CPP0X +# define nssv_HAS_CPP0X _HAS_CPP0X +#else +# define nssv_HAS_CPP0X 0 +#endif + +// Unless defined otherwise below, consider VC14 as C++11 for variant-lite: + +#if nssv_COMPILER_MSVC_VER >= 1900 +# undef nssv_CPP11_OR_GREATER +# define nssv_CPP11_OR_GREATER 1 +#endif + +#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) +#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) +#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) +#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) +#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) +#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) + +#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) +#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) + +// Presence of C++11 language features: + +#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 +#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 +#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 +#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 +#define nssv_HAVE_NULLPTR nssv_CPP11_100 +#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 +#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 +#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 +#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 +#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 + +#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) +# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 +#else +# define nssv_HAVE_STD_DEFINED_LITERALS 0 +#endif + +// Presence of C++14 language features: + +#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 + +// Presence of C++17 language features: + +#define nssv_HAVE_NODISCARD nssv_CPP17_000 + +// Presence of C++ library features: + +#define nssv_HAVE_STD_HASH nssv_CPP11_120 + +// C++ feature usage: + +#if nssv_HAVE_CONSTEXPR_11 +# define nssv_constexpr constexpr +#else +# define nssv_constexpr /*constexpr*/ +#endif + +#if nssv_HAVE_CONSTEXPR_14 +# define nssv_constexpr14 constexpr +#else +# define nssv_constexpr14 /*constexpr*/ +#endif + +#if nssv_HAVE_EXPLICIT_CONVERSION +# define nssv_explicit explicit +#else +# define nssv_explicit /*explicit*/ +#endif + +#if nssv_HAVE_INLINE_NAMESPACE +# define nssv_inline_ns inline +#else +# define nssv_inline_ns /*inline*/ +#endif + +#if nssv_HAVE_NOEXCEPT +# define nssv_noexcept noexcept +#else +# define nssv_noexcept /*noexcept*/ +#endif + +//#if nssv_HAVE_REF_QUALIFIER +//# define nssv_ref_qual & +//# define nssv_refref_qual && +//#else +//# define nssv_ref_qual /*&*/ +//# define nssv_refref_qual /*&&*/ +//#endif + +#if nssv_HAVE_NULLPTR +# define nssv_nullptr nullptr +#else +# define nssv_nullptr NULL +#endif + +#if nssv_HAVE_NODISCARD +# define nssv_nodiscard [[nodiscard]] +#else +# define nssv_nodiscard /*[[nodiscard]]*/ +#endif + +// Additional includes: + +#include <algorithm> +#include <cassert> +#include <iterator> +#include <limits> +#include <ostream> +#include <string> // std::char_traits<> + +#if ! nssv_CONFIG_NO_EXCEPTIONS +# include <stdexcept> +#endif + +#if nssv_CPP11_OR_GREATER +# include <type_traits> +#endif + +// Clang, GNUC, MSVC warning suppression macros: + +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wuser-defined-literals" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wliteral-suffix" +#endif // __clang__ + +#if nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) +# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) +#else +# define nssv_SUPPRESS_MSGSL_WARNING(expr) +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) +# define nssv_DISABLE_MSVC_WARNINGS(codes) +#endif + +#if defined(__clang__) +# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") +#elif defined(__GNUC__) +# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") +#elif nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) +#else +# define nssv_RESTORE_WARNINGS() +#endif + +// Suppress the following MSVC (GSL) warnings: +// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not +// start with an underscore are reserved +// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; +// use brace initialization, gsl::narrow_cast or gsl::narow +// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead + +nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) +//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) +//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) + +namespace nonstd { namespace sv_lite { + +#if nssv_CPP11_OR_GREATER + +namespace detail { + +#if nssv_CPP14_OR_GREATER + +template< typename CharT > +inline constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +{ + CharT * v = s; + std::size_t r = result; + while ( *v != '\0' ) { + ++v; + ++r; + } + return r; +} + +#else // nssv_CPP14_OR_GREATER + +// Expect tail call optimization to make length() non-recursive: + +template< typename CharT > +inline constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +{ + return *s == '\0' ? result : length( s + 1, result + 1 ); +} + +#endif // nssv_CPP14_OR_GREATER + +} // namespace detail + +#endif // nssv_CPP11_OR_GREATER + +template +< + class CharT, + class Traits = std::char_traits<CharT> +> +class basic_string_view; + +// +// basic_string_view: +// + +template +< + class CharT, + class Traits /* = std::char_traits<CharT> */ +> +class basic_string_view +{ +public: + // Member types: + + typedef Traits traits_type; + typedef CharT value_type; + + typedef CharT * pointer; + typedef CharT const * const_pointer; + typedef CharT & reference; + typedef CharT const & const_reference; + + typedef const_pointer iterator; + typedef const_pointer const_iterator; + typedef std::reverse_iterator< const_iterator > reverse_iterator; + typedef std::reverse_iterator< const_iterator > const_reverse_iterator; + + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // 24.4.2.1 Construction and assignment: + + nssv_constexpr basic_string_view() nssv_noexcept + : data_( nssv_nullptr ) + , size_( 0 ) + {} + +#if nssv_CPP11_OR_GREATER + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept + : data_( other.data_) + , size_( other.size_) + {} +#endif + + nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept + : data_( s ) + , size_( count ) + {} + + nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept + : data_( s ) +#if nssv_CPP17_OR_GREATER + , size_( Traits::length(s) ) +#elif nssv_CPP11_OR_GREATER + , size_( detail::length(s) ) +#else + , size_( Traits::length(s) ) +#endif + {} + + // Assignment: + +#if nssv_CPP11_OR_GREATER + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept + { + data_ = other.data_; + size_ = other.size_; + return *this; + } +#endif + + // 24.4.2.2 Iterator support: + + nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } + nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } + + nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } + nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } + + nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } + nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } + + nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } + nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } + + // 24.4.2.3 Capacity: + + nssv_constexpr size_type size() const nssv_noexcept { return size_; } + nssv_constexpr size_type length() const nssv_noexcept { return size_; } + nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } + + // since C++20 + nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept + { + return 0 == size_; + } + + // 24.4.2.4 Element access: + + nssv_constexpr const_reference operator[]( size_type pos ) const + { + return data_at( pos ); + } + + nssv_constexpr14 const_reference at( size_type pos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos < size() ); +#else + if ( pos >= size() ) + { + throw std::out_of_range("nonstd::string_view::at()"); + } +#endif + return data_at( pos ); + } + + nssv_constexpr const_reference front() const { return data_at( 0 ); } + nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } + + nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } + + // 24.4.2.5 Modifiers: + + nssv_constexpr14 void remove_prefix( size_type n ) + { + assert( n <= size() ); + data_ += n; + size_ -= n; + } + + nssv_constexpr14 void remove_suffix( size_type n ) + { + assert( n <= size() ); + size_ -= n; + } + + nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept + { + using std::swap; + swap( data_, other.data_ ); + swap( size_, other.size_ ); + } + + // 24.4.2.6 String operations: + + size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::copy()"); + } +#endif + const size_type rlen = (std::min)( n, size() - pos ); + + (void) Traits::copy( dest, data() + pos, rlen ); + + return rlen; + } + + nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::substr()"); + } +#endif + return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); + } + + // compare(), 6x: + + nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) + { + if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) + { + return result; + } + + return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) + { + return substr( pos1, n1 ).compare( other ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) + { + return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); + } + + nssv_constexpr int compare( CharT const * s ) const // (4) + { + return compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) + { + return substr( pos1, n1 ).compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) + { + return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); + } + + // 24.4.2.7 Searching: + + // starts_with(), 3x, since C++20: + + nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( 0, v.size(), v ) == 0; + } + + nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) + { + return starts_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool starts_with( CharT const * s ) const // (3) + { + return starts_with( basic_string_view( s ) ); + } + + // ends_with(), 3x, since C++20: + + nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; + } + + nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) + { + return ends_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool ends_with( CharT const * s ) const // (3) + { + return ends_with( basic_string_view( s ) ); + } + + // find(), 4x: + + nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return assert( v.size() == 0 || v.data() != nssv_nullptr ) + , pos >= size() + ? npos + : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const // (4) + { + return find( basic_string_view( s ), pos ); + } + + // rfind(), 4x: + + nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + if ( size() < v.size() ) + { + return npos; + } + + if ( v.empty() ) + { + return (std::min)( size(), pos ); + } + + const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); + const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); + + return result != last ? size_type( result - cbegin() ) : npos; + } + + nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return rfind( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) + { + return rfind( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) + { + return rfind( basic_string_view( s ), pos ); + } + + // find_first_of(), 4x: + + nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find_first_of( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_of( basic_string_view( s ), pos ); + } + + // find_last_of(), 4x: + + nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_of( v, size() - 1 ) + : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_of( basic_string_view( s ), pos ); + } + + // find_first_not_of(), 4x: + + nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_first_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_not_of( basic_string_view( s ), pos ); + } + + // find_last_not_of(), 4x: + + nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_not_of( v, size() - 1 ) + : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_not_of( basic_string_view( s ), pos ); + } + + // Constants: + +#if nssv_CPP17_OR_GREATER + static nssv_constexpr size_type npos = size_type(-1); +#elif nssv_CPP11_OR_GREATER + enum : size_type { npos = size_type(-1) }; +#else + enum { npos = size_type(-1) }; +#endif + +private: + struct not_in_view + { + const basic_string_view v; + + nssv_constexpr explicit not_in_view( basic_string_view v ) : v( v ) {} + + nssv_constexpr bool operator()( CharT c ) const + { + return npos == v.find_first_of( c ); + } + }; + + nssv_constexpr size_type to_pos( const_iterator it ) const + { + return it == cend() ? npos : size_type( it - cbegin() ); + } + + nssv_constexpr size_type to_pos( const_reverse_iterator it ) const + { + return it == crend() ? npos : size_type( crend() - it - 1 ); + } + + nssv_constexpr const_reference data_at( size_type pos ) const + { +#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) + return data_[pos]; +#else + return assert( pos < size() ), data_[pos]; +#endif + } + +private: + const_pointer data_; + size_type size_; + +public: +#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS + + template< class Allocator > + basic_string_view( std::basic_string<CharT, Traits, Allocator> const & s ) nssv_noexcept + : data_( s.data() ) + , size_( s.size() ) + {} + +#if nssv_HAVE_EXPLICIT_CONVERSION + + template< class Allocator > + explicit operator std::basic_string<CharT, Traits, Allocator>() const + { + return to_string( Allocator() ); + } + +#endif // nssv_HAVE_EXPLICIT_CONVERSION + +#if nssv_CPP11_OR_GREATER + + template< class Allocator = std::allocator<CharT> > + std::basic_string<CharT, Traits, Allocator> + to_string( Allocator const & a = Allocator() ) const + { + return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a ); + } + +#else + + std::basic_string<CharT, Traits> + to_string() const + { + return std::basic_string<CharT, Traits>( begin(), end() ); + } + + template< class Allocator > + std::basic_string<CharT, Traits, Allocator> + to_string( Allocator const & a ) const + { + return std::basic_string<CharT, Traits, Allocator>( begin(), end(), a ); + } + +#endif // nssv_CPP11_OR_GREATER + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +}; + +// +// Non-member functions: +// + +// 24.4.3 Non-member comparison functions: +// lexicographically compare two string views (function template): + +template< class CharT, class Traits > +nssv_constexpr bool operator== ( + basic_string_view <CharT, Traits> lhs, + basic_string_view <CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) == 0 ; } + +template< class CharT, class Traits > +nssv_constexpr bool operator!= ( + basic_string_view <CharT, Traits> lhs, + basic_string_view <CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) != 0 ; } + +template< class CharT, class Traits > +nssv_constexpr bool operator< ( + basic_string_view <CharT, Traits> lhs, + basic_string_view <CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0 ; } + +template< class CharT, class Traits > +nssv_constexpr bool operator<= ( + basic_string_view <CharT, Traits> lhs, + basic_string_view <CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0 ; } + +template< class CharT, class Traits > +nssv_constexpr bool operator> ( + basic_string_view <CharT, Traits> lhs, + basic_string_view <CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0 ; } + +template< class CharT, class Traits > +nssv_constexpr bool operator>= ( + basic_string_view <CharT, Traits> lhs, + basic_string_view <CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0 ; } + +// Let S be basic_string_view<CharT, Traits>, and sv be an instance of S. +// Implementations shall provide sufficient additional overloads marked +// constexpr and noexcept so that an object t with an implicit conversion +// to S can be compared according to Table 67. + +#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) + +// accomodate for older compilers: + +// == + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view<CharT, Traits> lhs, + char const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + char const * lhs, + basic_string_view<CharT, Traits> rhs ) nssv_noexcept +{ return rhs.compare( lhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view<CharT, Traits> lhs, + std::basic_string<CharT, Traits> rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + std::basic_string<CharT, Traits> rhs, + basic_string_view<CharT, Traits> lhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view<CharT, Traits> lhs, + char const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) != 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + char const * lhs, + basic_string_view<CharT, Traits> rhs ) nssv_noexcept +{ return rhs.compare( lhs ) != 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view<CharT, Traits> lhs, + std::basic_string<CharT, Traits> rhs ) nssv_noexcept +{ return lhs.size() != rhs.size() && lhs.compare( rhs ) != 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + std::basic_string<CharT, Traits> rhs, + basic_string_view<CharT, Traits> lhs ) nssv_noexcept +{ return lhs.size() != rhs.size() || rhs.compare( lhs ) != 0; } + +// < + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view<CharT, Traits> lhs, + char const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + char const * lhs, + basic_string_view<CharT, Traits> rhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view<CharT, Traits> lhs, + std::basic_string<CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + std::basic_string<CharT, Traits> rhs, + basic_string_view<CharT, Traits> lhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +// <= + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view<CharT, Traits> lhs, + char const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + char const * lhs, + basic_string_view<CharT, Traits> rhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view<CharT, Traits> lhs, + std::basic_string<CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + std::basic_string<CharT, Traits> rhs, + basic_string_view<CharT, Traits> lhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +// > + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view<CharT, Traits> lhs, + char const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + char const * lhs, + basic_string_view<CharT, Traits> rhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view<CharT, Traits> lhs, + std::basic_string<CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + std::basic_string<CharT, Traits> rhs, + basic_string_view<CharT, Traits> lhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +// >= + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view<CharT, Traits> lhs, + char const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + char const * lhs, + basic_string_view<CharT, Traits> rhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view<CharT, Traits> lhs, + std::basic_string<CharT, Traits> rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + std::basic_string<CharT, Traits> rhs, + basic_string_view<CharT, Traits> lhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +#else // newer compilers: + +#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view<T,U> >::type + +#if nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 140, 150 ) +# define nssv_MSVC_ORDER(x) , int=x +#else +# define nssv_MSVC_ORDER(x) /*, int=x*/ +#endif + +// == + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator==( + basic_string_view <CharT, Traits> lhs, + nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator==( + nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, + basic_string_view <CharT, Traits> rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator!= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.size() != rhs.size() || lhs.compare( rhs ) != 0 ; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator!= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) != 0 ; } + +// < + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator< ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0 ; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator< ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0 ; } + +// <= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator<= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0 ; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator<= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0 ; } + +// > + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator> ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0 ; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator> ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0 ; } + +// >= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator>= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0 ; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator>= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0 ; } + +#undef nssv_MSVC_ORDER +#undef nssv_BASIC_STRING_VIEW_I + +#endif // compiler-dependent approach to comparisons + +// 24.4.4 Inserters and extractors: + +namespace detail { + +template< class Stream > +void write_padding( Stream & os, std::streamsize n ) +{ + for ( std::streamsize i = 0; i < n; ++i ) + os.rdbuf()->sputc( os.fill() ); +} + +template< class Stream, class View > +Stream & write_to_stream( Stream & os, View const & sv ) +{ + typename Stream::sentry sentry( os ); + + if ( !os ) + return os; + + const std::streamsize length = static_cast<std::streamsize>( sv.length() ); + + // Whether, and how, to pad: + const bool pad = ( length < os.width() ); + const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; + + if ( left_pad ) + write_padding( os, os.width() - length ); + + // Write span characters: + os.rdbuf()->sputn( sv.begin(), length ); + + if ( pad && !left_pad ) + write_padding( os, os.width() - length ); + + // Reset output stream width: + os.width( 0 ); + + return os; +} + +} // namespace detail + +template< class CharT, class Traits > +std::basic_ostream<CharT, Traits> & +operator<<( + std::basic_ostream<CharT, Traits>& os, + basic_string_view <CharT, Traits> sv ) +{ + return detail::write_to_stream( os, sv ); +} + +// Several typedefs for common character types are provided: + +typedef basic_string_view<char> string_view; +typedef basic_string_view<wchar_t> wstring_view; +#if nssv_HAVE_WCHAR16_T +typedef basic_string_view<char16_t> u16string_view; +typedef basic_string_view<char32_t> u32string_view; +#endif + +}} // namespace nonstd::sv_lite + +// +// 24.4.6 Suffix for basic_string_view literals: +// + +#if nssv_HAVE_USER_DEFINED_LITERALS + +namespace nonstd { +nssv_inline_ns namespace literals { +nssv_inline_ns namespace string_view_literals { + +#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +#if nssv_CONFIG_USR_SV_OPERATOR + +nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +}}} // namespace nonstd::literals::string_view_literals + +#endif + +// +// Extensions for std::string: +// + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { +namespace sv_lite { + +// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): + +#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 + +template< class CharT, class Traits, class Allocator = std::allocator<CharT> > +std::basic_string<CharT, Traits, Allocator> +to_string( basic_string_view<CharT, Traits> v, Allocator const & a = Allocator() ) +{ + return std::basic_string<CharT,Traits, Allocator>( v.begin(), v.end(), a ); +} + +#else + +template< class CharT, class Traits > +std::basic_string<CharT, Traits> +to_string( basic_string_view<CharT, Traits> v ) +{ + return std::basic_string<CharT, Traits>( v.begin(), v.end() ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string<CharT, Traits, Allocator> +to_string( basic_string_view<CharT, Traits> v, Allocator const & a ) +{ + return std::basic_string<CharT, Traits, Allocator>( v.begin(), v.end(), a ); +} + +#endif // nssv_CPP11_OR_GREATER + +template< class CharT, class Traits, class Allocator > +basic_string_view<CharT, Traits> +to_string_view( std::basic_string<CharT, Traits, Allocator> const & s ) +{ + return basic_string_view<CharT, Traits>( s.data(), s.size() ); +} + +}} // namespace nonstd::sv_lite + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +// +// make types and algorithms available in namespace nonstd: +// + +namespace nonstd { + +using sv_lite::basic_string_view; +using sv_lite::string_view; +using sv_lite::wstring_view; + +#if nssv_HAVE_WCHAR16_T +using sv_lite::u16string_view; +#endif +#if nssv_HAVE_WCHAR32_T +using sv_lite::u32string_view; +#endif + +// literal "sv" + +using sv_lite::operator==; +using sv_lite::operator!=; +using sv_lite::operator<; +using sv_lite::operator<=; +using sv_lite::operator>; +using sv_lite::operator>=; + +using sv_lite::operator<<; + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +using sv_lite::to_string; +using sv_lite::to_string_view; +#endif + +} // namespace nonstd + +// 24.4.5 Hash support (C++11): + +// Note: The hash value of a string view object is equal to the hash value of +// the corresponding string object. + +#if nssv_HAVE_STD_HASH + +#include <functional> + +namespace std { + +template<> +struct hash< nonstd::string_view > +{ +public: + std::size_t operator()( nonstd::string_view v ) const nssv_noexcept + { + return std::hash<std::string>()( std::string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::wstring_view > +{ +public: + std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept + { + return std::hash<std::wstring>()( std::wstring( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u16string_view > +{ +public: + std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept + { + return std::hash<std::u16string>()( std::u16string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u32string_view > +{ +public: + std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept + { + return std::hash<std::u32string>()( std::u32string( v.data(), v.size() ) ); + } +}; + +} // namespace std + +#endif // nssv_HAVE_STD_HASH + +nssv_RESTORE_WARNINGS() + +#endif // nssv_HAVE_STD_STRING_VIEW +#endif // NONSTD_SV_LITE_H_INCLUDED +/* end file include/simdjson/nonstd/string_view.hpp */ +SIMDJSON_POP_DISABLE_WARNINGS + +namespace std { + using string_view = nonstd::string_view; +} +#endif // SIMDJSON_HAS_STRING_VIEW +#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + #endif // SIMDJSON_COMMON_DEFS_H -/* end file include/simdjson/portability.h */ +/* end file include/simdjson/nonstd/string_view.hpp */ + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +// Public API +/* begin file include/simdjson/simdjson_version.h */ +// /include/simdjson/simdjson_version.h automatically generated by release.py, +// do not change by hand +#ifndef SIMDJSON_SIMDJSON_VERSION_H +#define SIMDJSON_SIMDJSON_VERSION_H + +/** The version of simdjson being used (major.minor.revision) */ +#define SIMDJSON_VERSION 0.4.6 + +namespace simdjson { +enum { + /** + * The major version (MAJOR.minor.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MAJOR = 0, + /** + * The minor version (major.MINOR.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MINOR = 4, + /** + * The revision (major.minor.REVISION) of simdjson being used. + */ + SIMDJSON_VERSION_REVISION = 6 +}; +} // namespace simdjson + +#endif // SIMDJSON_SIMDJSON_VERSION_H +/* end file include/simdjson/simdjson_version.h */ +/* begin file include/simdjson/error.h */ +#ifndef SIMDJSON_ERROR_H +#define SIMDJSON_ERROR_H + #include <string> -#include <utility> namespace simdjson { /** * All possible errors returned by simdjson. */ enum error_code { SUCCESS = 0, ///< No error - SUCCESS_AND_HAS_MORE, ///< @private No error and buffer still has more data CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string @@ -365,11 +2103,12 @@ /** * Get the error message for the given error code. * * dom::parser parser; - * auto [doc, error] = parser.parse("foo"); + * dom::element doc; + * auto error = parser.parse("foo").get(doc); * if (error) { printf("Error: %s\n", error_message(error)); } * * @return The error message. */ inline const char *error_message(error_code error) noexcept; @@ -442,14 +2181,24 @@ */ really_inline simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ really_inline void tie(T &value, error_code &error) && noexcept; /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + really_inline error_code get(T &value) && noexcept; + + /** * The error. */ really_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -504,14 +2253,24 @@ */ really_inline simdjson_result(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ - really_inline void tie(T& t, error_code & e) && noexcept; + really_inline void tie(T &value, error_code &error) && noexcept; /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + WARN_UNUSED really_inline error_code get(T &value) && noexcept; + + /** * The error. */ really_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -552,18 +2311,20 @@ inline const std::string &error_message(int error) noexcept; } // namespace simdjson #endif // SIMDJSON_ERROR_H -/* end file include/simdjson/portability.h */ +/* end file include/simdjson/error.h */ /* begin file include/simdjson/padded_string.h */ #ifndef SIMDJSON_PADDED_STRING_H #define SIMDJSON_PADDED_STRING_H + #include <cstring> #include <memory> #include <string> +#include <ostream> namespace simdjson { /** * String with extra allocation for ease of use with parser::parse() @@ -596,11 +2357,11 @@ */ inline padded_string(const std::string & str_ ) noexcept; /** * Create a new padded string by copying the given input. * - * @param str_ the string to copy + * @param sv_ the string to copy */ inline padded_string(std::string_view sv_) noexcept; /** * Move one padded string into another. * @@ -658,97 +2419,500 @@ private: padded_string &operator=(const padded_string &o) = delete; padded_string(const padded_string &o) = delete; - size_t viable_size; + size_t viable_size{0}; char *data_ptr{nullptr}; }; // padded_string +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); } + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result<padded_string> &s) noexcept(false) { return out << s.value(); } +#endif + } // namespace simdjson // This is deliberately outside of simdjson so that people get it without having to use the namespace inline simdjson::padded_string operator "" _padded(const char *str, size_t len) { return simdjson::padded_string(str, len); } -namespace simdjson::internal { +namespace simdjson { +namespace internal { // low-level function to allocate memory with padding so we can read past the // "length" bytes safely. if you must provide a pointer to some data, create it // with this function: length is the max. size in bytes of the string caller is // responsible to free the memory (free(...)) inline char *allocate_padded_buffer(size_t length) noexcept; -} // namespace simdjson::internal; +} // namespace internal +} // namespace simdjson #endif // SIMDJSON_PADDED_STRING_H /* end file include/simdjson/padded_string.h */ /* begin file include/simdjson/implementation.h */ #ifndef SIMDJSON_IMPLEMENTATION_H #define SIMDJSON_IMPLEMENTATION_H -#include <optional> +/* begin file include/simdjson/internal/dom_parser_implementation.h */ +#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H + +#include <memory> + +namespace simdjson { + +namespace dom { +class document; +} // namespace dom + +namespace internal { + +/** + * An implementation of simdjson's DOM parser for a particular CPU architecture. + * + * This class is expected to be accessed only by pointer, and never move in memory (though the + * pointer can move). + */ +class dom_parser_implementation { +public: + + /** + * @private For internal implementation use + * + * Run a full JSON parse on a single document (stage1 + stage2). + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param len The length of the json document. + * @return The error code, or SUCCESS if there was no error. + */ + WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 1 of the document parser. + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. + * @param len The length of the json document. + * @param streaming Whether this is being called by parser::parse_many. + * @return The error code, or SUCCESS if there was no error. + */ + WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser. + * + * Called after stage1(). + * + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, or SUCCESS if there was no error. + */ + WARN_UNUSED virtual error_code stage2(dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser for parser::parse_many. + * + * Guaranteed only to be called after stage1(). + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. + */ + WARN_UNUSED virtual error_code stage2_next(dom::document &doc) noexcept = 0; + + /** + * Change the capacity of this parser. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_capacity(size_t capacity) noexcept = 0; + + /** + * Change the max depth of this parser. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_max_depth(size_t max_depth) noexcept = 0; + + /** + * Deallocate this parser. + */ + virtual ~dom_parser_implementation() = default; + + /** Number of structural indices passed from stage 1 to stage 2 */ + uint32_t n_structural_indexes{0}; + /** Structural indices passed from stage 1 to stage 2 */ + std::unique_ptr<uint32_t[]> structural_indexes{}; + /** Next structural index to parse */ + uint32_t next_structural_index{0}; + + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + really_inline size_t capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + really_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + +protected: + /** + * The maximum document length this parser supports. + * + * Buffers are large enough to handle any document up to this length. + */ + size_t _capacity{0}; + + /** + * The maximum depth (number of nested objects and arrays) supported by this parser. + * + * Defaults to DEFAULT_MAX_DEPTH. + */ + size_t _max_depth{0}; +}; // class dom_parser_implementation + +really_inline size_t dom_parser_implementation::capacity() const noexcept { + return _capacity; +} + +really_inline size_t dom_parser_implementation::max_depth() const noexcept { + return _max_depth; +} + +WARN_UNUSED +inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { + if (this->max_depth() != max_depth) { + error_code err = set_max_depth(max_depth); + if (err) { return err; } + } + if (_capacity != capacity) { + error_code err = set_capacity(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +/* end file include/simdjson/internal/dom_parser_implementation.h */ #include <string> #include <atomic> #include <vector> -/* begin file include/simdjson/document.h */ -#ifndef SIMDJSON_DOCUMENT_H -#define SIMDJSON_DOCUMENT_H -#include <cstring> -#include <memory> -#include <string> -#include <limits> -#include <sstream> -/* begin file include/simdjson/simdjson.h */ +namespace simdjson { + /** - * @file - * @deprecated We'll be removing this file so it isn't confused with the top level simdjson.h + * Validate the UTF-8 string. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if the string is valid UTF-8. */ -#ifndef SIMDJSON_SIMDJSON_H -#define SIMDJSON_SIMDJSON_H +WARN_UNUSED bool validate_utf8(const char * buf, size_t len) noexcept; -#endif // SIMDJSON_H -/* end file include/simdjson/simdjson.h */ +/** + * Validate the UTF-8 string. + * + * @param sv the string_view to validate. + * @return true if the string is valid UTF-8. + */ +really_inline WARN_UNUSED bool validate_utf8(const std::string_view sv) noexcept { + return validate_utf8(sv.data(), sv.size()); +} -namespace simdjson::dom { +/** + * Validate the UTF-8 string. + * + * @param p the string to validate. + * @return true if the string is valid UTF-8. + */ +really_inline WARN_UNUSED bool validate_utf8(const std::string& s) noexcept { + return validate_utf8(s.data(), s.size()); +} -class parser; -class element; -class array; -class object; -class key_value_pair; -class document; -class document_stream; +namespace dom { + class document; +} // namespace dom -/** The default batch size for parser.parse_many() and parser.load_many() */ -static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * An implementation of simdjson for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active implementation is + * automatically initialized on first use to the most advanced implementation supported by the host. + */ +class implementation { +public: -} // namespace simdjson::dom + /** + * The name of this implementation. + * + * const implementation *impl = simdjson::active_implementation; + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual const std::string &name() const { return _name; } -namespace simdjson { + /** + * The description of this implementation. + * + * const implementation *impl = simdjson::active_implementation; + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual const std::string &description() const { return _description; } -template<> struct simdjson_result<dom::element>; -template<> struct simdjson_result<dom::array>; -template<> struct simdjson_result<dom::object>; + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `instruction_set` values + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; + /** + * @private For internal implementation use + * + * const implementation *impl = simdjson::active_implementation; + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @param capacity The largest document that will be passed to the parser. + * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. + * @param dst The place to put the resulting parser implementation. + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual error_code create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr<internal::dom_parser_implementation> &dst + ) const noexcept = 0; + + /** + * @private For internal implementation use + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * + * Overridden by each implementation. + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ + WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + WARN_UNUSED virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + +protected: + /** @private Construct an implementation with the given name and description. For subclasses. */ + really_inline implementation( + std::string_view name, + std::string_view description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } + virtual ~implementation()=default; + +private: + /** + * The name of this implementation. + */ + const std::string _name; + + /** + * The description of this implementation. + */ + const std::string _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdjson. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdjson */ + really_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdjson::available_implementations["westmere"]; + * if (!impl) { exit(1); } + * simdjson::active_implementation = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string_view &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdjson::available_implementation::detect_best_supported(); + * simdjson::active_implementation = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + template<typename T> -class minify; +class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +private: + std::atomic<T*> ptr; +}; + +} // namespace internal + +/** + * The list of available implementations compiled into simdjson. + */ +extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list available_implementations; + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ +extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> active_implementation; + } // namespace simdjson -namespace simdjson::internal { +#endif // SIMDJSON_IMPLEMENTATION_H +/* end file include/simdjson/internal/dom_parser_implementation.h */ +/* begin file include/simdjson/dom/array.h */ +#ifndef SIMDJSON_DOM_ARRAY_H +#define SIMDJSON_DOM_ARRAY_H -using namespace simdjson::dom; +/* begin file include/simdjson/internal/tape_ref.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_REF_H +#define SIMDJSON_INTERNAL_TAPE_REF_H -constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; +/* begin file include/simdjson/internal/tape_type.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H +#define SIMDJSON_INTERNAL_TAPE_TYPE_H +namespace simdjson { +namespace internal { + /** - * The possible types in the tape. Internal only. + * The possible types in the tape. */ enum class tape_type { ROOT = 'r', START_ARRAY = '[', START_OBJECT = '{', @@ -759,80 +2923,178 @@ UINT64 = 'u', DOUBLE = 'd', TRUE_VALUE = 't', FALSE_VALUE = 'f', NULL_VALUE = 'n' -}; +}; // enum class tape_type +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H +/* end file include/simdjson/internal/tape_type.h */ + +namespace simdjson { + +namespace dom { + class document; +} + +namespace internal { + +constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; +constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; + /** * A reference to an element on the tape. Internal only. */ class tape_ref { public: really_inline tape_ref() noexcept; - really_inline tape_ref(const document *doc, size_t json_index) noexcept; + really_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; inline size_t after_element() const noexcept; really_inline tape_type tape_ref_type() const noexcept; really_inline uint64_t tape_value() const noexcept; + really_inline bool is_double() const noexcept; + really_inline bool is_int64() const noexcept; + really_inline bool is_uint64() const noexcept; + really_inline bool is_false() const noexcept; + really_inline bool is_true() const noexcept; + really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. + really_inline uint32_t matching_brace_index() const noexcept; + really_inline uint32_t scope_count() const noexcept; template<typename T> really_inline T next_tape_value() const noexcept; + really_inline uint32_t get_string_length() const noexcept; + really_inline const char * get_c_str() const noexcept; inline std::string_view get_string_view() const noexcept; /** The document this element references. */ - const document *doc; + const dom::document *doc; /** The index of this element on `doc.tape[]` */ size_t json_index; }; -} // namespace simdjson::internal +} // namespace internal +} // namespace simdjson -namespace simdjson::dom { +#endif // SIMDJSON_INTERNAL_TAPE_REF_H +/* end file include/simdjson/internal/tape_type.h */ +/* begin file include/simdjson/minify.h */ +#ifndef SIMDJSON_MINIFY_H +#define SIMDJSON_MINIFY_H +#include <string> +#include <ostream> +#include <sstream> + +namespace simdjson { + + + /** - * The actual concrete type of a JSON element - * This is the type it is most easily cast to with get<>. + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * This function is much faster than parsing a JSON string and then writing a minified version of it. + * However, it does not validate the input. + * + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. */ -enum class element_type { - ARRAY, ///< dom::array - OBJECT, ///< dom::object - INT64, ///< int64_t - UINT64, ///< uint64_t: any integer that fits in uint64_t but *not* int64_t - DOUBLE, ///< double: Any number with a "." or "e" that fits in double. - STRING, ///< std::string_view - BOOL, ///< bool - NULL_VALUE ///< null +WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept; + +/** + * Minifies a JSON element or document, printing the smallest possible valid JSON. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << minify(doc) << endl; // prints [1,2,3] + * + */ +template<typename T> +class minifier { +public: + /** + * Create a new minifier. + * + * @param _value The document or element to minify. + */ + inline minifier(const T &_value) noexcept : value{_value} {} + + /** + * Minify JSON to a string. + */ + inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } + + /** + * Minify JSON to an output stream. + */ + inline std::ostream& print(std::ostream& out); +private: + const T &value; }; +template<typename T> +inline minifier<T> minify(const T &value) noexcept { return minifier<T>(value); } + /** + * Minify JSON to an output stream. + * + * @param out The output stream. + * @param formatter The minifier. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +template<typename T> +inline std::ostream& operator<<(std::ostream& out, minifier<T> formatter) { return formatter.print(out); } + +} // namespace simdjson + +#endif // SIMDJSON_MINIFY_H +/* end file include/simdjson/minify.h */ +#include <ostream> + +namespace simdjson { +namespace dom { + +class document; +class element; + +/** * JSON array. */ -class array : protected internal::tape_ref { +class array { public: /** Create a new, invalid array */ really_inline array() noexcept; - class iterator : protected internal::tape_ref { + class iterator { public: /** * Get the actual value */ inline element operator*() const noexcept; /** * Get the next value. * * Part of the std::iterator interface. + * */ - inline void operator++() noexcept; + inline iterator& operator++() noexcept; /** * Check if these values come from the same place in the JSON. * * Part of the std::iterator interface. */ inline bool operator!=(const iterator& other) const noexcept; private: - really_inline iterator(const document *doc, size_t json_index) noexcept; + really_inline iterator(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; friend class array; }; /** * Return the first array element. @@ -844,16 +3106,21 @@ * One past the last array element. * * Part of the std::iterable interface. */ inline iterator end() const noexcept; - /** + * Get the size of the array (number of immediate children). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** * Get the value associated with the given JSON pointer. * * dom::parser parser; - * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"); + * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded); * a.at("0/foo/a/1") == 20 * a.at("0")["foo"]["a"].at(1) == 20 * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object @@ -862,159 +3129,106 @@ * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept; /** - * Get the value at the given index. + * Get the value at the given index. This function has linear-time complexity and + * is equivalent to the following: + * + * size_t i=0; + * for (auto element : *this) { + * if (i == index) { return element; } + * i++; + * } + * return INDEX_OUT_OF_BOUNDS; * + * Avoid calling the at() function repeatedly. + * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ inline simdjson_result<element> at(size_t index) const noexcept; private: - really_inline array(const document *doc, size_t json_index) noexcept; + really_inline array(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; friend class element; friend struct simdjson_result<element>; template<typename T> - friend class simdjson::minify; + friend class simdjson::minifier; }; /** - * JSON object. + * Print JSON to an output stream. + * + * By default, the value will be printed minified. + * + * @param out The output stream. + * @param value The value to print. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ -class object : protected internal::tape_ref { +inline std::ostream& operator<<(std::ostream& out, const array &value); + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result<dom::array> : public internal::simdjson_result_base<dom::array> { public: - /** Create a new, invalid object */ - really_inline object() noexcept; + really_inline simdjson_result() noexcept; ///< @private + really_inline simdjson_result(dom::array value) noexcept; ///< @private + really_inline simdjson_result(error_code error) noexcept; ///< @private - class iterator : protected internal::tape_ref { - public: - /** - * Get the actual key/value pair - */ - inline const key_value_pair operator*() const noexcept; - /** - * Get the next key/value pair. - * - * Part of the std::iterator interface. - */ - inline void operator++() noexcept; - /** - * Check if these key value pairs come from the same place in the JSON. - * - * Part of the std::iterator interface. - */ - inline bool operator!=(const iterator& other) const noexcept; - /** - * Get the key of this key/value pair. - */ - inline std::string_view key() const noexcept; - /** - * Get the key of this key/value pair. - */ - inline const char *key_c_str() const noexcept; - /** - * Get the value of this key/value pair. - */ - inline element value() const noexcept; - private: - really_inline iterator(const document *doc, size_t json_index) noexcept; - friend class object; - }; + inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; + inline simdjson_result<dom::element> at(size_t index) const noexcept; - /** - * Return the first key/value pair. - * - * Part of the std::iterable interface. - */ - inline iterator begin() const noexcept; - /** - * One past the last key/value pair. - * - * Part of the std::iterable interface. - */ - inline iterator end() const noexcept; +#if SIMDJSON_EXCEPTIONS + inline dom::array::iterator begin() const noexcept(false); + inline dom::array::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1 - * parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - * - INCORRECT_TYPE if this is not an object - */ - inline simdjson_result<element> operator[](const std::string_view &key) const noexcept; +#if SIMDJSON_EXCEPTIONS +/** + * Print JSON to an output stream. + * + * By default, the value will be printed minified. + * + * @param out The output stream. + * @param value The value to print. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false); +#endif - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1 - * parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - * - INCORRECT_TYPE if this is not an object - */ - inline simdjson_result<element> operator[](const char *key) const noexcept; +} // namespace simdjson - /** - * Get the value associated with the given JSON pointer. - * - * dom::parser parser; - * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"); - * obj.at("foo/a/1") == 20 - * obj.at("foo")["a"].at(1) == 20 - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept; +#endif // SIMDJSON_DOM_ARRAY_H +/* end file include/simdjson/minify.h */ +/* begin file include/simdjson/dom/document_stream.h */ +#ifndef SIMDJSON_DOCUMENT_STREAM_H +#define SIMDJSON_DOCUMENT_STREAM_H - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1 - * parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - */ - inline simdjson_result<element> at_key(const std::string_view &key) const noexcept; +/* begin file include/simdjson/dom/parser.h */ +#ifndef SIMDJSON_DOM_PARSER_H +#define SIMDJSON_DOM_PARSER_H - /** - * Get the value associated with the given key in a case-insensitive manner. - * - * Note: The key will be matched against **unescaped** JSON. - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - */ - inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept; +/* begin file include/simdjson/dom/document.h */ +#ifndef SIMDJSON_DOM_DOCUMENT_H +#define SIMDJSON_DOM_DOCUMENT_H -private: - really_inline object(const document *doc, size_t json_index) noexcept; - friend class element; - friend struct simdjson_result<element>; - template<typename T> - friend class simdjson::minify; -}; +#include <memory> +#include <ostream> +namespace simdjson { +namespace dom { + +class element; + /** * A parsed JSON document. * * This class cannot be copied, only moved, to avoid unintended allocations. */ @@ -1057,262 +3271,45 @@ * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). */ bool dump_raw_tape(std::ostream &os) const noexcept; /** @private Structural values. */ - std::unique_ptr<uint64_t[]> tape; + std::unique_ptr<uint64_t[]> tape{}; /** @private String values. * * Should be at least byte_capacity. */ - std::unique_ptr<uint8_t[]> string_buf; + std::unique_ptr<uint8_t[]> string_buf{}; private: inline error_code allocate(size_t len) noexcept; template<typename T> - friend class simdjson::minify; + friend class simdjson::minifier; friend class parser; }; // class document -/** - * A JSON element. - * - * References an element in a JSON document, representing a JSON null, boolean, string, number, - * array or object. - */ -class element : protected internal::tape_ref { -public: - /** Create a new, invalid element. */ - really_inline element() noexcept; +} // namespace dom +} // namespace simdjson - /** The type of this element. */ - really_inline element_type type() const noexcept; +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file include/simdjson/dom/document.h */ +#include <memory> +#include <ostream> +#include <string> - /** Whether this element is a json `null`. */ - really_inline bool is_null() const noexcept; +namespace simdjson { - /** - * Tell whether the value can be cast to the given primitive type. - * - * Supported types: - * - Boolean: bool - * - Number: double, uint64_t, int64_t - * - String: std::string_view, const char * - * - Array: array - */ - template<typename T> - really_inline bool is() const noexcept; +namespace dom { - /** - * Get the value as the given primitive type. - * - * Supported types: - * - Boolean: bool - * - Number: double, uint64_t, int64_t - * - String: std::string_view, const char * - * - Array: array - * - * @returns The value cast to the given type, or: - * INCORRECT_TYPE if the value cannot be cast to the given type. - */ - template<typename T> - really_inline simdjson_result<T> get() const noexcept; +class document_stream; +class element; -#if SIMDJSON_EXCEPTIONS - /** - * Read this element as a boolean. - * - * @return The boolean value - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean. - */ - inline operator bool() const noexcept(false); +/** The default batch size for parser.parse_many() and parser.load_many() */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; - /** - * Read this element as a null-terminated string. - * - * Does *not* convert other types to a string; requires that the JSON type of the element was - * an actual string. - * - * @return The string value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. - */ - inline explicit operator const char*() const noexcept(false); - - /** - * Read this element as a null-terminated string. - * - * Does *not* convert other types to a string; requires that the JSON type of the element was - * an actual string. - * - * @return The string value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. - */ - inline operator std::string_view() const noexcept(false); - - /** - * Read this element as an unsigned integer. - * - * @return The integer value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative - */ - inline operator uint64_t() const noexcept(false); - /** - * Read this element as an signed integer. - * - * @return The integer value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits - */ - inline operator int64_t() const noexcept(false); - /** - * Read this element as an double. - * - * @return The double value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative - */ - inline operator double() const noexcept(false); - /** - * Read this element as a JSON array. - * - * @return The JSON array. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array - */ - inline operator array() const noexcept(false); - /** - * Read this element as a JSON object (key/value pairs). - * - * @return The JSON object. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object - */ - inline operator object() const noexcept(false); - - /** - * Iterate over each element in this array. - * - * @return The beginning of the iteration. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array - */ - inline dom::array::iterator begin() const noexcept(false); - - /** - * Iterate over each element in this array. - * - * @return The end of the iteration. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array - */ - inline dom::array::iterator end() const noexcept(false); -#endif // SIMDJSON_EXCEPTIONS - - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1 - * parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - * - INCORRECT_TYPE if this is not an object - */ - inline simdjson_result<element> operator[](const std::string_view &key) const noexcept; - - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1 - * parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - * - INCORRECT_TYPE if this is not an object - */ - inline simdjson_result<element> operator[](const char *key) const noexcept; - - /** - * Get the value associated with the given JSON pointer. - * - * dom::parser parser; - * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"); - * doc.at("/foo/a/1") == 20 - * doc.at("/")["foo"]["a"].at(1) == 20 - * doc.at("")["foo"]["a"].at(1) == 20 - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept; - - /** - * Get the value at the given index. - * - * @return The value at the given index, or: - * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length - */ - inline simdjson_result<element> at(size_t index) const noexcept; - - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1 - * parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - */ - inline simdjson_result<element> at_key(const std::string_view &key) const noexcept; - - /** - * Get the value associated with the given key in a case-insensitive manner. - * - * Note: The key will be matched against **unescaped** JSON. - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - */ - inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept; - - /** @private for debugging. Prints out the root element. */ - inline bool dump_raw_tape(std::ostream &out) const noexcept; - -private: - really_inline element(const document *doc, size_t json_index) noexcept; - friend class document; - friend class object; - friend class array; - friend struct simdjson_result<element>; - template<typename T> - friend class simdjson::minify; -}; - /** - * Key/value pair in an object. - */ -class key_value_pair { -public: - std::string_view key; - element value; - -private: - really_inline key_value_pair(const std::string_view &_key, element _value) noexcept; - friend class object; -}; - -/** * A persistent document parser. * * The parser is designed to be reused, holding the internal buffers necessary to do parsing, * as well as memory for a single document. The parsed document is overwritten on each parse. * @@ -1331,25 +3328,24 @@ * will allocate more capacity on an as needed basis (when it sees documents too big to handle) * up to this amount. The parser still starts with zero capacity no matter what this number is: * to allocate an initial capacity, call allocate() after constructing the parser. * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). */ - really_inline parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; - + really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ - parser(parser &&other) = default; + really_inline parser(parser &&other) noexcept; parser(const parser &) = delete; ///< @private Disallow copying /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ - parser &operator=(parser &&other) = default; + really_inline parser &operator=(parser &&other) noexcept; parser &operator=(const parser &) = delete; ///< @private Disallow copying /** Deallocate the JSON parser. */ ~parser()=default; @@ -1375,12 +3371,12 @@ * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ - inline simdjson_result<element> load(const std::string &path) noexcept; - + inline simdjson_result<element> load(const std::string &path) & noexcept; + inline simdjson_result<element> load(const std::string &path) && = delete ; /** * Parse a JSON document and return a temporary reference to it. * * dom::parser parser; * element doc = parser.parse(buf, len); @@ -1412,17 +3408,21 @@ * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, * and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. */ - inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept; + inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) noexcept; + really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - really_inline simdjson_result<element> parse(const std::string &s) noexcept; + really_inline simdjson_result<element> parse(const std::string &s) & noexcept; + really_inline simdjson_result<element> parse(const std::string &s) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - really_inline simdjson_result<element> parse(const padded_string &s) noexcept; + really_inline simdjson_result<element> parse(const padded_string &s) & noexcept; + really_inline simdjson_result<element> parse(const padded_string &s) && =delete; /** @private We do not want to allow implicit conversion from C string to std::string. */ really_inline simdjson_result<element> parse(const char *buf) noexcept = delete; /** @@ -1441,23 +3441,31 @@ * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * dom::parser parser; - * for (auto [doc, error] : parser.load_many(path)) { - * if (error) { cerr << error << endl; exit(1); } - * cout << std::string(doc["title"]) << endl; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; * } * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the @@ -1466,29 +3474,28 @@ * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * - * @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. + * @param path File name pointing at the concatenated JSON to parse. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. - * @return The stream. If there is an error, it will be returned during iteration. An empty input - * will yield 0 documents rather than an EMPTY error. Errors: + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. */ - inline document_stream load_many(const std::string &path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result<document_stream> load_many(const std::string &path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. * * dom::parser parser; - * for (const element doc : parser.parse_many(buf, len)) { + * for (element doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * ### Format * @@ -1498,23 +3505,31 @@ * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * dom::parser parser; - * for (auto [doc, error] : parser.parse_many(buf, len)) { - * if (error) { cerr << error << endl; exit(1); } - * cout << std::string(doc["title"]) << endl; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; * } * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -1534,26 +3549,25 @@ * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. - * @return The stream. If there is an error, it will be returned during iteration. An empty input - * will yield 0 documents rather than an EMPTY error. Errors: + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. */ - inline document_stream parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result<document_stream> parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline document_stream parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result<document_stream> parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline document_stream parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result<document_stream> parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline document_stream parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result<document_stream> parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; /** @private We do not want to allow implicit conversion from C string to std::string. */ - really_inline simdjson_result<element> parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + simdjson_result<document_stream> parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * @@ -1601,11 +3615,12 @@ really_inline size_t max_depth() const noexcept; /** * Set max_capacity. This is the largest document this parser can automatically support. * - * The parser may reallocate internal buffers as needed up to this amount. + * The parser may reallocate internal buffers as needed up to this amount as documents are passed + * to it. * * This call will not allocate or deallocate, even if capacity is currently above max_capacity. * * @param max_capacity The new maximum capacity, in bytes. */ @@ -1614,38 +3629,20 @@ /** @private Use the new DOM API instead */ class Iterator; /** @private Use simdjson_error instead */ using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error; - /** @private Next location to write to in the tape */ - uint32_t current_loc{0}; + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr<internal::dom_parser_implementation> implementation{}; - /** @private Number of structural indices passed from stage 1 to stage 2 */ - uint32_t n_structural_indexes{0}; - /** @private Structural indices passed from stage 1 to stage 2 */ - std::unique_ptr<uint32_t[]> structural_indexes; - - /** @private Tape location of each open { or [ */ - std::unique_ptr<uint32_t[]> containing_scope_offset; -#ifdef SIMDJSON_USE_COMPUTED_GOTO - /** @private Return address of each open { or [ */ - std::unique_ptr<void*[]> ret_address; -#else - /** @private Return address of each open { or [ */ - std::unique_ptr<char[]> ret_address; -#endif - - /** @private Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; - /** @private Use `if (parser.parse(...).error())` instead */ bool valid{false}; /** @private Use `parser.parse(...).error()` instead */ error_code error{UNINITIALIZED}; /** @private Use `parser.parse(...).value()` instead */ - document doc; + document doc{}; /** @private returns true if the document parsed was valid */ [[deprecated("Use the result of parser.parse() instead")]] inline bool is_valid() const noexcept; @@ -1665,60 +3662,27 @@ inline bool print_json(std::ostream &os) const noexcept; /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ inline bool dump_raw_tape(std::ostream &os) const noexcept; - // - // Parser callbacks: these are internal! - // - - /** @private this should be called when parsing (right before writing the tapes) */ - inline void init_stage2() noexcept; - really_inline error_code on_error(error_code new_error_code) noexcept; ///< @private - really_inline error_code on_success(error_code success_code) noexcept; ///< @private - really_inline bool on_start_document(uint32_t depth) noexcept; ///< @private - really_inline bool on_start_object(uint32_t depth) noexcept; ///< @private - really_inline bool on_start_array(uint32_t depth) noexcept; ///< @private - // TODO we're not checking this bool - really_inline bool on_end_document(uint32_t depth) noexcept; ///< @private - really_inline bool on_end_object(uint32_t depth) noexcept; ///< @private - really_inline bool on_end_array(uint32_t depth) noexcept; ///< @private - really_inline bool on_true_atom() noexcept; ///< @private - really_inline bool on_false_atom() noexcept; ///< @private - really_inline bool on_null_atom() noexcept; ///< @private - really_inline uint8_t *on_start_string() noexcept; ///< @private - really_inline bool on_end_string(uint8_t *dst) noexcept; ///< @private - really_inline bool on_number_s64(int64_t value) noexcept; ///< @private - really_inline bool on_number_u64(uint64_t value) noexcept; ///< @private - really_inline bool on_number_double(double value) noexcept; ///< @private - private: /** * The maximum document length this parser will automatically support. * * The parser will not be automatically allocated above this amount. */ size_t _max_capacity; /** - * The maximum document length this parser supports. - * - * Buffers are large enough to handle any document up to this length. - */ - size_t _capacity{0}; - - /** - * The maximum depth (number of nested objects and arrays) supported by this parser. - * - * Defaults to DEFAULT_MAX_DEPTH. - */ - size_t _max_depth{0}; - - /** * The loaded buffer (reused each time load() is called) */ + #if defined(_MSC_VER) && _MSC_VER < 1910 + // older versions of Visual Studio lack proper support for unique_ptr. + std::unique_ptr<char[]> loaded_bytes; + #else std::unique_ptr<char[], decltype(&aligned_free_char)> loaded_bytes; + #endif /** Capacity of loaded_bytes buffer. */ size_t _loaded_bytes_capacity{0}; // all nodes are stored on the doc.tape using a 64-bit word. @@ -1733,13 +3697,10 @@ // location on the doc.tape of the end, and for then closings (} and ]), we // annotate them with a reference to the location of the opening // // - inline void write_tape(uint64_t val, internal::tape_type t) noexcept; - inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) noexcept; - /** * Ensure we have enough capacity to handle at least desired_capacity bytes, * and auto-allocate if not. */ inline error_code ensure_capacity(size_t desired_capacity) noexcept; @@ -1749,696 +3710,1107 @@ friend class parser::Iterator; friend class document_stream; }; // class parser -} // namespace simdjson::dom +} // namespace dom +} // namespace simdjson +#endif // SIMDJSON_DOM_PARSER_H +/* end file include/simdjson/dom/document.h */ +#ifdef SIMDJSON_THREADS_ENABLED +#include <thread> +#include <mutex> +#include <condition_variable> +#endif + namespace simdjson { +namespace dom { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + dom::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; +}; +#endif + /** - * Minifies a JSON element or document, printing the smallest possible valid JSON. + * A forward-only stream of documents. * - * dom::parser parser; - * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); - * cout << minify(doc) << endl; // prints [1,2,3] + * Produced by parser::parse_many. * */ -template<typename T> -class minify { +class document_stream { public: /** - * Create a new minifier. + * Construct an uninitialized document_stream. * - * @param _value The document or element to minify. + * ```c++ + * document_stream docs; + * error = parser.parse_many(json).get(docs); + * ``` */ - inline minify(const T &_value) noexcept : value{_value} {} + really_inline document_stream() noexcept; + /** Move one document_stream to another. */ + really_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + really_inline document_stream &operator=(document_stream &&other) noexcept = default; + really_inline ~document_stream() noexcept; + /** - * Minify JSON to a string. + * An iterator through a forward-only stream of documents. */ - inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } + class iterator { + public: + /** + * Get the current document (or error). + */ + really_inline simdjson_result<element> operator*() noexcept; + /** + * Advance to the next document. + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + really_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + really_inline size_t current_index() noexcept; + private: + really_inline iterator(document_stream &s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream& stream; + /** Whether we're finished or not. */ + bool finished; + friend class document_stream; + }; /** - * Minify JSON to an output stream. + * Start iterating the documents in the stream. */ - inline std::ostream& print(std::ostream& out); + really_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + really_inline iterator end() noexcept; + private: - const T &value; -}; -/** - * Minify JSON to an output stream. - * - * @param out The output stream. - * @param formatter The minifier. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -template<typename T> -inline std::ostream& operator<<(std::ostream& out, minify<T> formatter) { return formatter.print(out); } + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying -namespace dom { + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + */ + really_inline document_stream( + dom::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size + ) noexcept; -// << operators need to be in the same namespace as the class being output, so C++ can find them -// automatically + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; -/** - * Print JSON to an output stream. - * - * By default, the value will be printed minified. - * - * @param out The output stream. - * @param value The value to print. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, const element &value) { return out << minify(value); }; -/** - * Print JSON to an output stream. - * - * By default, the value will be printed minified. - * - * @param out The output stream. - * @param value The value to print. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, const array &value) { return out << minify(value); } -/** - * Print JSON to an output stream. - * - * By default, the value will be printed minified. - * - * @param out The output stream. - * @param value The value to print. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, const object &value) { return out << minify(value); } -/** - * Print JSON to an output stream. - * - * By default, the value will be printed minified. - * - * @param out The output stream. - * @param value The value to print. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value) { return out << minify(value); } + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; -/** - * Print element type to an output stream. - * - * @param out The output stream. - * @param value The value to print. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, element_type type) { - switch (type) { - case element_type::ARRAY: - return out << "array"; - case element_type::OBJECT: - return out << "object"; - case element_type::INT64: - return out << "int64_t"; - case element_type::UINT64: - return out << "uint64_t"; - case element_type::DOUBLE: - return out << "double"; - case element_type::STRING: - return out << "string"; - case element_type::BOOL: - return out << "bool"; - case element_type::NULL_VALUE: - return out << "null"; - default: - abort(); - } -} + /** + * Pass the next batch through stage 1 and return when finished. + * When threads are enabled, this may wait for the stage 1 thread to finish. + */ + inline void load_batch() noexcept; -} // namespace dom + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; -#if SIMDJSON_EXCEPTIONS + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept; -/** - * Print JSON to an output stream. - * - * By default, the value will be printed minified. - * - * @param out The output stream. - * @param value The value to print. - * @throw simdjson_error if the result being printed has an error. If there is an error with the - * underlying output stream, that error will be propagated (simdjson_error will not be - * thrown). - */ -inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false) { return out << minify(value); } -/** - * Print JSON to an output stream. - * - * By default, the value will be printed minified. - * - * @param out The output stream. - * @param value The value to print. - * @throw simdjson_error if the result being printed has an error. If there is an error with the - * underlying output stream, that error will be propagated (simdjson_error will not be - * thrown). - */ -inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false) { return out << minify(value); } -/** - * Print JSON to an output stream. - * - * By default, the value will be printed minified. - * - * @param out The output stream. - * @param value The value to print. - * @throw simdjson_error if the result being printed has an error. If there is an error with the - * underlying output stream, that error will be propagated (simdjson_error will not be - * thrown). - */ -inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false) { return out << minify(value); } + dom::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; -#endif +#ifdef SIMDJSON_THREADS_ENABLED + inline void load_from_stage1_thread() noexcept; -/** The result of a JSON navigation that may fail. */ -template<> -struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom::element> { -public: - really_inline simdjson_result() noexcept; ///< @private - really_inline simdjson_result(dom::element &&value) noexcept; ///< @private - really_inline simdjson_result(error_code error) noexcept; ///< @private + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; - inline simdjson_result<dom::element_type> type() const noexcept; - inline simdjson_result<bool> is_null() const noexcept; - template<typename T> - inline simdjson_result<bool> is() const noexcept; - template<typename T> - inline simdjson_result<T> get() const noexcept; + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; - inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept; - inline simdjson_result<dom::element> operator[](const char *key) const noexcept; - inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; - inline simdjson_result<dom::element> at(size_t index) const noexcept; - inline simdjson_result<dom::element> at_key(const std::string_view &key) const noexcept; - inline simdjson_result<dom::element> at_key_case_insensitive(const std::string_view &key) const noexcept; + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + friend struct stage1_worker; + std::unique_ptr<stage1_worker> worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + dom::parser stage1_thread_parser{}; +#endif // SIMDJSON_THREADS_ENABLED -#if SIMDJSON_EXCEPTIONS - inline operator bool() const noexcept(false); - inline explicit operator const char*() const noexcept(false); - inline operator std::string_view() const noexcept(false); - inline operator uint64_t() const noexcept(false); - inline operator int64_t() const noexcept(false); - inline operator double() const noexcept(false); - inline operator dom::array() const noexcept(false); - inline operator dom::object() const noexcept(false); + friend class dom::parser; + friend struct simdjson_result<dom::document_stream>; + friend struct internal::simdjson_result_base<dom::document_stream>; - inline dom::array::iterator begin() const noexcept(false); - inline dom::array::iterator end() const noexcept(false); -#endif // SIMDJSON_EXCEPTIONS -}; +}; // class document_stream -/** The result of a JSON conversion that may fail. */ -template<> -struct simdjson_result<dom::array> : public internal::simdjson_result_base<dom::array> { -public: - really_inline simdjson_result() noexcept; ///< @private - really_inline simdjson_result(dom::array value) noexcept; ///< @private - really_inline simdjson_result(error_code error) noexcept; ///< @private +} // namespace dom - inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; - inline simdjson_result<dom::element> at(size_t index) const noexcept; - -#if SIMDJSON_EXCEPTIONS - inline dom::array::iterator begin() const noexcept(false); - inline dom::array::iterator end() const noexcept(false); -#endif // SIMDJSON_EXCEPTIONS -}; - -/** The result of a JSON conversion that may fail. */ template<> -struct simdjson_result<dom::object> : public internal::simdjson_result_base<dom::object> { +struct simdjson_result<dom::document_stream> : public internal::simdjson_result_base<dom::document_stream> { public: really_inline simdjson_result() noexcept; ///< @private - really_inline simdjson_result(dom::object value) noexcept; ///< @private really_inline simdjson_result(error_code error) noexcept; ///< @private + really_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private - inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept; - inline simdjson_result<dom::element> operator[](const char *key) const noexcept; - inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; - inline simdjson_result<dom::element> at_key(const std::string_view &key) const noexcept; - inline simdjson_result<dom::element> at_key_case_insensitive(const std::string_view &key) const noexcept; - #if SIMDJSON_EXCEPTIONS - inline dom::object::iterator begin() const noexcept(false); - inline dom::object::iterator end() const noexcept(false); + really_inline dom::document_stream::iterator begin() noexcept(false); + really_inline dom::document_stream::iterator end() noexcept(false); +#else // SIMDJSON_EXCEPTIONS + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + really_inline dom::document_stream::iterator begin() noexcept; + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + really_inline dom::document_stream::iterator end() noexcept; #endif // SIMDJSON_EXCEPTIONS -}; +}; // struct simdjson_result<dom::document_stream> } // namespace simdjson -#endif // SIMDJSON_DOCUMENT_H -/* end file include/simdjson/simdjson.h */ +#endif // SIMDJSON_DOCUMENT_STREAM_H +/* end file include/simdjson/dom/document.h */ +/* begin file include/simdjson/dom/element.h */ +#ifndef SIMDJSON_DOM_ELEMENT_H +#define SIMDJSON_DOM_ELEMENT_H +#include <ostream> + namespace simdjson { +namespace dom { +class array; +class document; +class object; + /** - * An implementation of simdjson for a particular CPU architecture. + * The actual concrete type of a JSON element + * This is the type it is most easily cast to with get<>. + */ +enum class element_type { + ARRAY = '[', ///< dom::array + OBJECT = '{', ///< dom::object + INT64 = 'l', ///< int64_t + UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t + DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double. + STRING = '"', ///< std::string_view + BOOL = 't', ///< bool + NULL_VALUE = 'n' ///< null +}; + +/** + * A JSON element. * - * Also used to maintain the currently active implementation. The active implementation is - * automatically initialized on first use to the most advanced implementation supported by the host. + * References an element in a JSON document, representing a JSON null, boolean, string, number, + * array or object. */ -class implementation { +class element { public: + /** Create a new, invalid element. */ + really_inline element() noexcept; + + /** The type of this element. */ + really_inline element_type type() const noexcept; + /** - * The name of this implementation. + * Cast this element to an array. * - * const implementation *impl = simdjson::active_implementation; - * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * Equivalent to get<array>(). * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + * @returns An object that can be used to iterate the array, or: + * INCORRECT_TYPE if the JSON element is not an array. */ - virtual const std::string &name() const { return _name; } + inline simdjson_result<array> get_array() const noexcept; + /** + * Cast this element to an object. + * + * Equivalent to get<object>(). + * + * @returns An object that can be used to look up or iterate the object's fields, or: + * INCORRECT_TYPE if the JSON element is not an object. + */ + inline simdjson_result<object> get_object() const noexcept; + /** + * Cast this element to a null-terminated C string. + * + * The string is guaranteed to be valid UTF-8. + * + * The get_c_str() function is equivalent to get<const char *>(). + * + * The length of the string is given by get_string_length(). Because JSON strings + * may contain null characters, it may be incorrect to use strlen to determine the + * string length. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will + * be invalidated the next time it parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result<const char *> get_c_str() const noexcept; + /** + * Gives the length in bytes of the string. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A string length in bytes. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result<size_t> get_string_length() const noexcept; + /** + * Cast this element to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get<std::string_view>(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it + * parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result<std::string_view> get_string() const noexcept; + /** + * Cast this element to a signed integer. + * + * Equivalent to get<int64_t>(). + * + * @returns A signed 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is negative. + */ + inline simdjson_result<int64_t> get_int64() const noexcept; + /** + * Cast this element to an unsigned integer. + * + * Equivalent to get<uint64_t>(). + * + * @returns An unsigned 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is too large. + */ + inline simdjson_result<uint64_t> get_uint64() const noexcept; + /** + * Cast this element to an double floating-point. + * + * Equivalent to get<double>(). + * + * @returns A double value. + * Returns INCORRECT_TYPE if the JSON element is not a number. + */ + inline simdjson_result<double> get_double() const noexcept; + /** + * Cast this element to a bool. + * + * Equivalent to get<bool>(). + * + * @returns A bool value. + * Returns INCORRECT_TYPE if the JSON element is not a boolean. + */ + inline simdjson_result<bool> get_bool() const noexcept; /** - * The description of this implementation. + * Whether this element is a json array. * - * const implementation *impl = simdjson::active_implementation; - * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * Equivalent to is<array>(). + */ + inline bool is_array() const noexcept; + /** + * Whether this element is a json object. * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + * Equivalent to is<object>(). */ - virtual const std::string &description() const { return _description; } + inline bool is_object() const noexcept; + /** + * Whether this element is a json string. + * + * Equivalent to is<std::string_view>() or is<const char *>(). + */ + inline bool is_string() const noexcept; + /** + * Whether this element is a json number that fits in a signed 64-bit integer. + * + * Equivalent to is<int64_t>(). + */ + inline bool is_int64() const noexcept; + /** + * Whether this element is a json number that fits in an unsigned 64-bit integer. + * + * Equivalent to is<uint64_t>(). + */ + inline bool is_uint64() const noexcept; + /** + * Whether this element is a json number that fits in a double. + * + * Equivalent to is<double>(). + */ + inline bool is_double() const noexcept; + /** + * Whether this element is a json number. + * + * Both integers and floating points will return true. + */ + inline bool is_number() const noexcept; + /** + * Whether this element is a json `true` or `false`. + * + * Equivalent to is<bool>(). + */ + inline bool is_bool() const noexcept; + /** + * Whether this element is a json `null`. + */ + inline bool is_null() const noexcept; /** - * @private For internal implementation use + * Tell whether the value can be cast to provided type (T). * - * The instruction sets this implementation is compiled against. + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object * - * @return a mask of all required `instruction_set` values + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object */ - virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; + template<typename T> + really_inline bool is() const noexcept; /** - * @private For internal implementation use + * Get the value as the provided type (T). * - * Run a full document parse (ensure_capacity, stage1 and stage2). + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object * - * Overridden by each implementation. + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object * - * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. - * @param len the length of the json document. - * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. - * @return the error code, or SUCCESS if there was no error. + * @returns The value cast to the given type, or: + * INCORRECT_TYPE if the value cannot be cast to the given type. */ - WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept = 0; + template<typename T> + inline simdjson_result<T> get() const noexcept; /** - * @private For internal implementation use + * Get the value as the provided type (T). * - * Run a full document parse (ensure_capacity, stage1 and stage2). + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object * - * Overridden by each implementation. + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object * - * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. - * @param len the length of the json document. - * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. - * @param dst_len the number of bytes written. Output only. - * @return the error code, or SUCCESS if there was no error. + * @param value The variable to set to the value. May not be set if there is an error. + * + * @returns The error that occurred, or SUCCESS if there was no error. */ - WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + template<typename T> + WARN_UNUSED really_inline error_code get(T &value) const noexcept; /** - * @private For internal implementation use + * Get the value as the provided type (T), setting error if it's not the given type. * - * Stage 1 of the document parser. + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object * - * Overridden by each implementation. + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object * - * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. - * @param len the length of the json document. - * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. - * @param streaming whether this is being called by parser::parse_many. - * @return the error code, or SUCCESS if there was no error. + * @param value The variable to set to the given type. value is undefined if there is an error. + * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error. */ - WARN_UNUSED virtual error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) const noexcept = 0; + template<typename T> + inline void tie(T &value, error_code &error) && noexcept; +#if SIMDJSON_EXCEPTIONS /** - * @private For internal implementation use + * Read this element as a boolean. * - * Stage 2 of the document parser. + * @return The boolean value + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean. + */ + inline operator bool() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Be mindful that JSON allows strings to contain null characters. * - * Overridden by each implementation. + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. * - * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. - * @param len the length of the json document. - * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. - * @return the error code, or SUCCESS if there was no error. + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. */ - WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept = 0; + inline explicit operator const char*() const noexcept(false); /** - * @private For internal implementation use + * Read this element as a null-terminated UTF-8 string. * - * Stage 2 of the document parser for parser::parse_many. + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. * - * Overridden by each implementation. + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline operator std::string_view() const noexcept(false); + + /** + * Read this element as an unsigned integer. * - * @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. - * @param len the length of the json document. - * @param parser the parser with the buffers to use. *MUST* have allocated up to at least len capacity. - * @param next_json the next structural index. Start this at 0 the first time, and it will be updated to the next value to pass each time. - * @return the error code, SUCCESS if there was no error, or SUCCESS_AND_HAS_MORE if there was no error and stage2 can be called again. + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative */ - WARN_UNUSED virtual error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) const noexcept = 0; + inline operator uint64_t() const noexcept(false); + /** + * Read this element as an signed integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits + */ + inline operator int64_t() const noexcept(false); + /** + * Read this element as an double. + * + * @return The double value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative + */ + inline operator double() const noexcept(false); + /** + * Read this element as a JSON array. + * + * @return The JSON array. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline operator array() const noexcept(false); + /** + * Read this element as a JSON object (key/value pairs). + * + * @return The JSON object. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object + */ + inline operator object() const noexcept(false); -protected: - /** @private Construct an implementation with the given name and description. For subclasses. */ - really_inline implementation( - std::string_view name, - std::string_view description, - uint32_t required_instruction_sets - ) : - _name(name), - _description(description), - _required_instruction_sets(required_instruction_sets) - { - } + /** + * Iterate over each element in this array. + * + * @return The beginning of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator begin() const noexcept(false); -private: /** - * The name of this implementation. + * Iterate over each element in this array. + * + * @return The end of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array */ - const std::string _name; + inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS /** - * The description of this implementation. + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get<uint64_t>().first == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get<uint64_t>().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object */ - const std::string _description; + inline simdjson_result<element> operator[](const std::string_view &key) const noexcept; /** - * Instruction sets required for this implementation. + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get<uint64_t>().first == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get<uint64_t>().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object */ - const uint32_t _required_instruction_sets; -}; + inline simdjson_result<element> operator[](const char *key) const noexcept; -/** @private */ -namespace internal { + /** + * Get the value associated with the given JSON pointer. + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at("/foo/a/1") == 20 + * doc.at("/")["foo"]["a"].at(1) == 20 + * doc.at("")["foo"]["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept; -/** - * The list of available implementations compiled into simdjson. - */ -class available_implementation_list { -public: - /** Get the list of available implementations compiled into simdjson */ - really_inline available_implementation_list() {} - /** Number of implementations */ - size_t size() const noexcept; - /** STL const begin() iterator */ - const implementation * const *begin() const noexcept; - /** STL const end() iterator */ - const implementation * const *end() const noexcept; + /** + * Get the value at the given index. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result<element> at(size_t index) const noexcept; /** - * Get the implementation with the given name. + * Get the value associated with the given key. * - * Case sensitive. + * The key will be matched against **unescaped** JSON: * - * const implementation *impl = simdjson::available_implementations["westmere"]; - * if (!impl) { exit(1); } - * simdjson::active_implementation = impl; + * dom::parser parser; + * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get<uint64_t>().first == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get<uint64_t>().error() == NO_SUCH_FIELD * - * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" - * @return the implementation, or nullptr if the parse failed. + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object */ - const implementation * operator[](const std::string_view &name) const noexcept { - for (const implementation * impl : *this) { - if (impl->name() == name) { return impl; } - } - return nullptr; - } + inline simdjson_result<element> at_key(const std::string_view &key) const noexcept; /** - * Detect the most advanced implementation supported by the current host. + * Get the value associated with the given key in a case-insensitive manner. * - * This is used to initialize the implementation on startup. + * Note: The key will be matched against **unescaped** JSON. * - * const implementation *impl = simdjson::available_implementation::detect_best_supported(); - * simdjson::active_implementation = impl; - * - * @return the most advanced supported implementation for the current host, or an - * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported - * implementation. Will never return nullptr. + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object */ - const implementation *detect_best_supported() const noexcept; + inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept; + + /** @private for debugging. Prints out the root element. */ + inline bool dump_raw_tape(std::ostream &out) const noexcept; + +private: + really_inline element(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class document; + friend class object; + friend class array; + friend struct simdjson_result<element>; + template<typename T> + friend class simdjson::minifier; }; /** - * @private Detects best supported implementation on first use, and sets it + * Print JSON to an output stream. + * + * By default, the value will be printed minified. + * + * @param out The output stream. + * @param value The value to print. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ -class detect_best_supported_implementation_on_first_use final : public implementation { -public: - const std::string &name() const noexcept final { return set_best()->name(); } - const std::string &description() const noexcept final { return set_best()->description(); } - uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } - WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept final { - return set_best()->parse(buf, len, parser); - } - WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final { - return set_best()->minify(buf, len, dst, dst_len); - } - WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, dom::parser &parser, bool streaming) const noexcept final { - return set_best()->stage1(buf, len, parser, streaming); - } - WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser) const noexcept final { - return set_best()->stage2(buf, len, parser); - } - WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, dom::parser &parser, size_t &next_json) const noexcept final { - return set_best()->stage2(buf, len, parser, next_json); - } +inline std::ostream& operator<<(std::ostream& out, const element &value); - really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} -private: - const implementation *set_best() const noexcept; -}; +/** + * Print element type to an output stream. + * + * @param out The output stream. + * @param value The value to print. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, element_type type); -inline const detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; +} // namespace dom -template<typename T> -class atomic_ptr { +/** The result of a JSON navigation that may fail. */ +template<> +struct simdjson_result<dom::element> : public internal::simdjson_result_base<dom::element> { public: - atomic_ptr(T *_ptr) : ptr{_ptr} {} + really_inline simdjson_result() noexcept; ///< @private + really_inline simdjson_result(dom::element &&value) noexcept; ///< @private + really_inline simdjson_result(error_code error) noexcept; ///< @private - operator const T*() const { return ptr.load(); } - const T& operator*() const { return *ptr; } - const T* operator->() const { return ptr.load(); } + really_inline simdjson_result<dom::element_type> type() const noexcept; + template<typename T> + really_inline bool is() const noexcept; + template<typename T> + really_inline simdjson_result<T> get() const noexcept; + template<typename T> + WARN_UNUSED really_inline error_code get(T &value) const noexcept; - operator T*() { return ptr.load(); } - T& operator*() { return *ptr; } - T* operator->() { return ptr.load(); } - T* operator=(T *_ptr) { return ptr = _ptr; } + really_inline simdjson_result<dom::array> get_array() const noexcept; + really_inline simdjson_result<dom::object> get_object() const noexcept; + really_inline simdjson_result<const char *> get_c_str() const noexcept; + really_inline simdjson_result<size_t> get_string_length() const noexcept; + really_inline simdjson_result<std::string_view> get_string() const noexcept; + really_inline simdjson_result<int64_t> get_int64() const noexcept; + really_inline simdjson_result<uint64_t> get_uint64() const noexcept; + really_inline simdjson_result<double> get_double() const noexcept; + really_inline simdjson_result<bool> get_bool() const noexcept; -private: - std::atomic<T*> ptr; -}; + really_inline bool is_array() const noexcept; + really_inline bool is_object() const noexcept; + really_inline bool is_string() const noexcept; + really_inline bool is_int64() const noexcept; + really_inline bool is_uint64() const noexcept; + really_inline bool is_double() const noexcept; + really_inline bool is_bool() const noexcept; + really_inline bool is_null() const noexcept; -} // namespace [simdjson::]internal + really_inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept; + really_inline simdjson_result<dom::element> operator[](const char *key) const noexcept; + really_inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; + really_inline simdjson_result<dom::element> at(size_t index) const noexcept; + really_inline simdjson_result<dom::element> at_key(const std::string_view &key) const noexcept; + really_inline simdjson_result<dom::element> at_key_case_insensitive(const std::string_view &key) const noexcept; +#if SIMDJSON_EXCEPTIONS + really_inline operator bool() const noexcept(false); + really_inline explicit operator const char*() const noexcept(false); + really_inline operator std::string_view() const noexcept(false); + really_inline operator uint64_t() const noexcept(false); + really_inline operator int64_t() const noexcept(false); + really_inline operator double() const noexcept(false); + really_inline operator dom::array() const noexcept(false); + really_inline operator dom::object() const noexcept(false); + + really_inline dom::array::iterator begin() const noexcept(false); + really_inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + +#if SIMDJSON_EXCEPTIONS /** - * The list of available implementations compiled into simdjson. + * Print JSON to an output stream. + * + * By default, the value will be printed minified. + * + * @param out The output stream. + * @param value The value to print. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). */ -inline const internal::available_implementation_list available_implementations; +really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false); +#endif -/** - * The active implementation. - * - * Automatically initialized on first use to the most advanced implementation supported by this hardware. - * - * @hideinitializer - */ -inline internal::atomic_ptr<const implementation> active_implementation = &internal::detect_best_supported_implementation_on_first_use_singleton; - } // namespace simdjson -#endif // SIMDJSON_IMPLEMENTATION_H -/* end file include/simdjson/simdjson.h */ -/* begin file include/simdjson/document_stream.h */ -#ifndef SIMDJSON_DOCUMENT_STREAM_H -#define SIMDJSON_DOCUMENT_STREAM_H +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file include/simdjson/dom/element.h */ +/* begin file include/simdjson/dom/object.h */ +#ifndef SIMDJSON_DOM_OBJECT_H +#define SIMDJSON_DOM_OBJECT_H -#include <thread> +#include <ostream> -namespace simdjson::dom { +namespace simdjson { +namespace dom { +class document; +class element; +class key_value_pair; + /** - * A forward-only stream of documents. - * - * Produced by parser::parse_many. - * + * JSON object. */ -class document_stream { +class object { public: - really_inline ~document_stream() noexcept; + /** Create a new, invalid object */ + really_inline object() noexcept; - /** - * An iterator through a forward-only stream of documents. - */ class iterator { public: /** - * Get the current document (or error). + * Get the actual key/value pair */ - really_inline simdjson_result<element> operator*() noexcept; + inline const key_value_pair operator*() const noexcept; /** - * Advance to the next document. + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * */ inline iterator& operator++() noexcept; /** - * Check if we're at the end yet. - * @param other the end iterator to compare to. + * Check if these key value pairs come from the same place in the JSON. + * + * Part of the std::iterator interface. */ - really_inline bool operator!=(const iterator &other) const noexcept; - + inline bool operator!=(const iterator& other) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline std::string_view key() const noexcept; + /** + * Get the length (in bytes) of the key in this key/value pair. + * You should expect this function to be faster than key().size(). + */ + inline uint32_t key_length() const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view. + */ + inline bool key_equals(const std::string_view & o) const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view in a case-insensitive manner. + * Case comparisons may only be handled correctly for ASCII strings. + */ + inline bool key_equals_case_insensitive(const std::string_view & o) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline const char *key_c_str() const noexcept; + /** + * Get the value of this key/value pair. + */ + inline element value() const noexcept; private: - iterator(document_stream& stream, bool finished) noexcept; - /** The document_stream we're iterating through. */ - document_stream& stream; - /** Whether we're finished or not. */ - bool finished; - friend class document_stream; + really_inline iterator(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class object; }; /** - * Start iterating the documents in the stream. + * Return the first key/value pair. + * + * Part of the std::iterable interface. */ - really_inline iterator begin() noexcept; + inline iterator begin() const noexcept; /** - * The end of the stream, for iterator comparison purposes. + * One past the last key/value pair. + * + * Part of the std::iterable interface. */ - really_inline iterator end() noexcept; - -private: - - document_stream &operator=(const document_stream &) = delete; // Disallow copying - - document_stream(document_stream &other) = delete; // Disallow copying - - really_inline document_stream(dom::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, error_code error = SUCCESS) noexcept; - + inline iterator end() const noexcept; /** - * Parse the next document found in the buffer previously given to document_stream. + * Get the size of the object (number of keys). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the value associated with the given key. * - * The content should be a valid JSON document encoded as UTF-8. If there is a - * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are - * discouraged. + * The key will be matched against **unescaped** JSON: * - * You do NOT need to pre-allocate a parser. This function takes care of - * pre-allocating a capacity defined by the batch_size defined when creating the - * document_stream object. + * dom::parser parser; + * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get<uint64_t>().first == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get<uint64_t>().error() == NO_SUCH_FIELD * - * The function returns simdjson::SUCCESS_AND_HAS_MORE (an integer = 1) in case - * of success and indicates that the buffer still contains more data to be parsed, - * meaning this function can be called again to return the next JSON document - * after this one. + * This function has linear-time complexity: the keys are checked one by one. * - * The function returns simdjson::SUCCESS (as integer = 0) in case of success - * and indicates that the buffer has successfully been parsed to the end. - * Every document it contained has been parsed without error. + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result<element> operator[](const std::string_view &key) const noexcept; + + /** + * Get the value associated with the given key. * - * The function returns an error code from simdjson/simdjson.h in case of failure - * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; - * the simdjson::error_message function converts these error codes into a string). + * The key will be matched against **unescaped** JSON: * - * You can also check validity by calling parser.is_valid(). The same parser can - * and should be reused for the other documents in the buffer. */ - inline error_code json_parse() noexcept; + * dom::parser parser; + * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get<uint64_t>().first == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get<uint64_t>().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result<element> operator[](const char *key) const noexcept; /** - * Returns the location (index) of where the next document should be in the - * buffer. - * Can be used for debugging, it tells the user the position of the end of the - * last - * valid JSON document parsed + * Get the value associated with the given JSON pointer. + * + * dom::parser parser; + * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at("foo/a/1") == 20 + * obj.at("foo")["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ - inline size_t get_current_buffer_loc() const { return current_buffer_loc; } + inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept; /** - * Returns the total amount of complete documents parsed by the document_stream, - * in the current buffer, at the given time. + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get<uint64_t>().first == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get<uint64_t>().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object */ - inline size_t get_n_parsed_docs() const { return n_parsed_docs; } + inline simdjson_result<element> at_key(const std::string_view &key) const noexcept; /** - * Returns the total amount of data (in bytes) parsed by the document_stream, - * in the current buffer, at the given time. + * Get the value associated with the given key in a case-insensitive manner. + * It is only guaranteed to work over ASCII inputs. + * + * Note: The key will be matched against **unescaped** JSON. + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object */ - inline size_t get_n_bytes_parsed() const { return n_bytes_parsed; } + inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept; - inline const uint8_t *buf() const { return _buf + buf_start; } +private: + really_inline object(const internal::tape_ref &tape) noexcept; - inline void advance(size_t offset) { buf_start += offset; } + internal::tape_ref tape; - inline size_t remaining() const { return _len - buf_start; } + friend class element; + friend struct simdjson_result<element>; + template<typename T> + friend class simdjson::minifier; +}; - dom::parser &parser; - const uint8_t *_buf; - const size_t _len; - size_t _batch_size; // this is actually variable! - size_t buf_start{0}; - size_t next_json{0}; - bool load_next_batch{true}; - size_t current_buffer_loc{0}; -#ifdef SIMDJSON_THREADS_ENABLED - size_t last_json_buffer_loc{0}; -#endif - size_t n_parsed_docs{0}; - size_t n_bytes_parsed{0}; - error_code error{SUCCESS_AND_HAS_MORE}; -#ifdef SIMDJSON_THREADS_ENABLED - error_code stage1_is_ok_thread{SUCCESS}; - std::thread stage_1_thread; - dom::parser parser_thread; -#endif - friend class dom::parser; -}; // class document_stream +/** + * Key/value pair in an object. + */ +class key_value_pair { +public: + /** key in the key-value pair **/ + std::string_view key; + /** value in the key-value pair **/ + element value; -} // end of namespace simdjson::dom +private: + really_inline key_value_pair(const std::string_view &_key, element _value) noexcept; + friend class object; +}; -#endif // SIMDJSON_DOCUMENT_STREAM_H -/* end file include/simdjson/document_stream.h */ +/** + * Print JSON to an output stream. + * + * By default, the value will be printed minified. + * + * @param out The output stream. + * @param value The value to print. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, const object &value); +/** + * Print JSON to an output stream. + * + * By default, the value will be printed minified. + * + * @param out The output stream. + * @param value The value to print. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value); +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result<dom::object> : public internal::simdjson_result_base<dom::object> { +public: + really_inline simdjson_result() noexcept; ///< @private + really_inline simdjson_result(dom::object value) noexcept; ///< @private + really_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result<dom::element> operator[](const std::string_view &key) const noexcept; + inline simdjson_result<dom::element> operator[](const char *key) const noexcept; + inline simdjson_result<dom::element> at(const std::string_view &json_pointer) const noexcept; + inline simdjson_result<dom::element> at_key(const std::string_view &key) const noexcept; + inline simdjson_result<dom::element> at_key_case_insensitive(const std::string_view &key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::object::iterator begin() const noexcept(false); + inline dom::object::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + +#if SIMDJSON_EXCEPTIONS +/** + * Print JSON to an output stream. + * + * By default, the value will be printed minified. + * + * @param out The output stream. + * @param value The value to print. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + +} // namespace simdjson + +#endif // SIMDJSON_DOM_OBJECT_H +/* end file include/simdjson/dom/object.h */ + // Deprecated API -/* begin file include/simdjson/jsonparser.h */ +/* begin file include/simdjson/dom/jsonparser.h */ // TODO Remove this -- deprecated API and files -#ifndef SIMDJSON_JSONPARSER_H -#define SIMDJSON_JSONPARSER_H +#ifndef SIMDJSON_DOM_JSONPARSER_H +#define SIMDJSON_DOM_JSONPARSER_H -/* begin file include/simdjson/parsedjson.h */ +/* begin file include/simdjson/dom/parsedjson.h */ // TODO Remove this -- deprecated API and files -#ifndef SIMDJSON_PARSEDJSON_H -#define SIMDJSON_PARSEDJSON_H +#ifndef SIMDJSON_DOM_PARSEDJSON_H +#define SIMDJSON_DOM_PARSEDJSON_H namespace simdjson { /** * @deprecated Use `dom::parser` instead. */ using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser; } // namespace simdjson -#endif -/* end file include/simdjson/parsedjson.h */ + +#endif // SIMDJSON_DOM_PARSEDJSON_H +/* end file include/simdjson/dom/parsedjson.h */ /* begin file include/simdjson/jsonioutil.h */ #ifndef SIMDJSON_JSONIOUTIL_H #define SIMDJSON_JSONIOUTIL_H -#include <exception> -#include <fstream> -#include <iostream> -#include <sstream> -#include <stdexcept> -#include <string> - namespace simdjson { #if SIMDJSON_EXCEPTIONS [[deprecated("Use padded_string::load() instead")]] @@ -2558,17 +4930,17 @@ /** @private We do not want to allow implicit conversion from C string to std::string. */ dom::parser build_parsed_json(const char *buf) noexcept = delete; } // namespace simdjson -#endif +#endif // SIMDJSON_DOM_JSONPARSER_H /* end file include/simdjson/jsonioutil.h */ -/* begin file include/simdjson/parsedjson_iterator.h */ +/* begin file include/simdjson/dom/parsedjson_iterator.h */ // TODO Remove this -- deprecated API and files -#ifndef SIMDJSON_PARSEDJSON_ITERATOR_H -#define SIMDJSON_PARSEDJSON_ITERATOR_H +#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H +#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H #include <cstring> #include <string> #include <iostream> #include <iterator> @@ -2581,11 +4953,12 @@ #include <iomanip> #include <iostream> #include <sstream> -namespace simdjson::internal { +namespace simdjson { +namespace internal { class escape_json_string; inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); @@ -2624,33 +4997,37 @@ break; default: if ((unsigned char)unescaped.str[i] <= 0x1F) { // TODO can this be done once at the beginning, or will it mess up << char? std::ios::fmtflags f(out.flags()); - out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(unescaped.str[i]); + out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); out.flags(f); } else { out << unescaped.str[i]; } } } return out; } -} // namespace simdjson::internal +} // namespace internal +} // namespace simdjson #endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H /* end file include/simdjson/internal/jsonformatutils.h */ namespace simdjson { -class [[deprecated("Use the new DOM navigation API instead (see doc/usage.md)")]] dom::parser::Iterator { +/** @private **/ +class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator { public: inline Iterator(const dom::parser &parser) noexcept(false); inline Iterator(const Iterator &o) noexcept; inline ~Iterator() noexcept; + inline Iterator& operator=(const Iterator&) = delete; + inline bool is_ok() const; // useful for debugging purposes inline size_t get_tape_location() const; @@ -2727,36 +5104,36 @@ inline bool is_array() const { return get_type() == '['; } inline bool is_string() const { return get_type() == '"'; } - // Returns true if the current type of node is an signed integer. + // Returns true if the current type of the node is an signed integer. // You can get its value with `get_integer()`. inline bool is_integer() const { return get_type() == 'l'; } - // Returns true if the current type of node is an unsigned integer. + // Returns true if the current type of the node is an unsigned integer. // You can get its value with `get_unsigned_integer()`. // // NOTE: // Only a large value, which is out of range of a 64-bit signed integer, is // represented internally as an unsigned node. On the other hand, a typical // positive integer, such as 1, 42, or 1000000, is as a signed node. // Be aware this function returns false for a signed node. inline bool is_unsigned_integer() const { return get_type() == 'u'; } - + // Returns true if the current type of the node is a double floating-point number. inline bool is_double() const { return get_type() == 'd'; } - + // Returns true if the current type of the node is a number (integer or floating-point). inline bool is_number() const { return is_integer() || is_unsigned_integer() || is_double(); } - + // Returns true if the current type of the node is a bool with true value. inline bool is_true() const { return get_type() == 't'; } - + // Returns true if the current type of the node is a bool with false value. inline bool is_false() const { return get_type() == 'f'; } - + // Returns true if the current type of the node is null. inline bool is_null() const { return get_type() == 'n'; } - + // Returns true if the type byte represents an object of an array static bool is_object_or_array(uint8_t type) { return ((type == '[') || (type == '{')); } // when at {, go one level deep, looking for a given key @@ -2812,11 +5189,11 @@ // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says // "If a referenced member name is not unique in an object, the member that // is referenced is undefined, and evaluation fails". Here we just return // the first corresponding value. inline bool move_to(const std::string &pointer) { - return move_to(pointer.c_str(), pointer.length()); + return move_to(pointer.c_str(), uint32_t(pointer.length())); } private: // Almost the same as move_to(), except it searches from the current // position. The pointer's syntax is identical, though that case is not @@ -2866,245 +5243,479 @@ inline void rewind() { while (up()) ; } - // void to_end_scope(); // move us to - // the start of our current scope; always succeeds + // print the node we are currently pointing at inline bool print(std::ostream &os, bool escape_strings = true) const; + + private: + const document &doc; + size_t max_depth{}; + size_t depth{}; + size_t location{}; // our current location on a tape + size_t tape_length{}; + uint8_t current_type{}; + uint64_t current_val{}; typedef struct { size_t start_of_scope; uint8_t scope_type; } scopeindex_t; - private: - const document &doc; - size_t max_depth; - size_t depth; - size_t location; // our current location on a tape - size_t tape_length; - uint8_t current_type; - uint64_t current_val; - scopeindex_t *depth_index; + scopeindex_t *depth_index{}; }; } // namespace simdjson -#endif +#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H /* end file include/simdjson/internal/jsonformatutils.h */ // Inline functions -/* begin file include/simdjson/inline/document.h */ -#ifndef SIMDJSON_INLINE_DOCUMENT_H -#define SIMDJSON_INLINE_DOCUMENT_H +/* begin file include/simdjson/inline/array.h */ +#ifndef SIMDJSON_INLINE_ARRAY_H +#define SIMDJSON_INLINE_ARRAY_H // Inline implementations go in here. -#include <iostream> -#include <climits> -#include <cctype> +#include <utility> namespace simdjson { // -// simdjson_result<dom::element> inline implementation +// simdjson_result<dom::array> inline implementation // -really_inline simdjson_result<dom::element>::simdjson_result() noexcept - : internal::simdjson_result_base<dom::element>() {} -really_inline simdjson_result<dom::element>::simdjson_result(dom::element &&value) noexcept - : internal::simdjson_result_base<dom::element>(std::forward<dom::element>(value)) {} -really_inline simdjson_result<dom::element>::simdjson_result(error_code error) noexcept - : internal::simdjson_result_base<dom::element>(error) {} -inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type() const noexcept { - if (error()) { return error(); } - return first.type(); +really_inline simdjson_result<dom::array>::simdjson_result() noexcept + : internal::simdjson_result_base<dom::array>() {} +really_inline simdjson_result<dom::array>::simdjson_result(dom::array value) noexcept + : internal::simdjson_result_base<dom::array>(std::forward<dom::array>(value)) {} +really_inline simdjson_result<dom::array>::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base<dom::array>(error) {} + +#if SIMDJSON_EXCEPTIONS + +inline dom::array::iterator simdjson_result<dom::array>::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); } -inline simdjson_result<bool> simdjson_result<dom::element>::is_null() const noexcept { - if (error()) { return error(); } - return first.is_null(); +inline dom::array::iterator simdjson_result<dom::array>::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); } -template<typename T> -inline simdjson_result<bool> simdjson_result<dom::element>::is() const noexcept { +inline size_t simdjson_result<dom::array>::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result<dom::element> simdjson_result<dom::array>::at(const std::string_view &json_pointer) const noexcept { if (error()) { return error(); } - return first.is<T>(); + return first.at(json_pointer); } -template<typename T> -inline simdjson_result<T> simdjson_result<dom::element>::get() const noexcept { +inline simdjson_result<dom::element> simdjson_result<dom::array>::at(size_t index) const noexcept { if (error()) { return error(); } - return first.get<T>(); + return first.at(index); } -inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const std::string_view &key) const noexcept { - if (error()) { return error(); } - return first[key]; +namespace dom { + +// +// array inline implementation +// +really_inline array::array() noexcept : tape{} {} +really_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} +inline array::iterator array::begin() const noexcept { + return internal::tape_ref(tape.doc, tape.json_index + 1); } -inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const char *key) const noexcept { - if (error()) { return error(); } - return first[key]; +inline array::iterator array::end() const noexcept { + return internal::tape_ref(tape.doc, tape.after_element() - 1); } -inline simdjson_result<dom::element> simdjson_result<dom::element>::at(const std::string_view &json_pointer) const noexcept { - if (error()) { return error(); } - return first.at(json_pointer); +inline size_t array::size() const noexcept { + return tape.scope_count(); } -inline simdjson_result<dom::element> simdjson_result<dom::element>::at(size_t index) const noexcept { - if (error()) { return error(); } - return first.at(index); +inline simdjson_result<element> array::at(const std::string_view &json_pointer) const noexcept { + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + + // Get the child + auto child = array(tape).at(array_index); + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at(json_pointer.substr(i+1)); + } + return child; } -inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key(const std::string_view &key) const noexcept { - if (error()) { return error(); } - return first.at_key(key); +inline simdjson_result<element> array::at(size_t index) const noexcept { + size_t i=0; + for (auto element : *this) { + if (i == index) { return element; } + i++; + } + return INDEX_OUT_OF_BOUNDS; } -inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key_case_insensitive(const std::string_view &key) const noexcept { - if (error()) { return error(); } - return first.at_key_case_insensitive(key); + +// +// array::iterator inline implementation +// +really_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline element array::iterator::operator*() const noexcept { + return element(tape); } +inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline array::iterator& array::iterator::operator++() noexcept { + tape.json_index = tape.after_element(); + return *this; +} +inline std::ostream& operator<<(std::ostream& out, const array &value) { + return out << minify<array>(value); +} + +} // namespace dom + +template<> +inline std::ostream& minifier<dom::array>::print(std::ostream& out) { + out << '['; + auto iter = value.begin(); + auto end = value.end(); + if (iter != end) { + out << minify<dom::element>(*iter); + for (++iter; iter != end; ++iter) { + out << "," << minify<dom::element>(*iter); + } + } + return out << ']'; +} + #if SIMDJSON_EXCEPTIONS -inline simdjson_result<dom::element>::operator bool() const noexcept(false) { - return get<bool>(); +template<> +inline std::ostream& minifier<simdjson_result<dom::array>>::print(std::ostream& out) { + if (value.error()) { throw simdjson_error(value.error()); } + return out << minify<dom::array>(value.first); } -inline simdjson_result<dom::element>::operator const char *() const noexcept(false) { - return get<const char *>(); + +inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false) { + return out << minify<simdjson_result<dom::array>>(value); } -inline simdjson_result<dom::element>::operator std::string_view() const noexcept(false) { - return get<std::string_view>(); + +#endif + +} // namespace simdjson + +#endif // SIMDJSON_INLINE_ARRAY_H +/* end file include/simdjson/inline/array.h */ +/* begin file include/simdjson/inline/document_stream.h */ +#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H +#define SIMDJSON_INLINE_DOCUMENT_STREAM_H + +#include <algorithm> +#include <limits> +#include <stdexcept> +namespace simdjson { +namespace dom { + +#ifdef SIMDJSON_THREADS_ENABLED +inline void stage1_worker::finish() { + std::unique_lock<std::mutex> lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); } -inline simdjson_result<dom::element>::operator uint64_t() const noexcept(false) { - return get<uint64_t>(); + +inline stage1_worker::~stage1_worker() { + stop_thread(); } -inline simdjson_result<dom::element>::operator int64_t() const noexcept(false) { - return get<int64_t>(); + +inline void stage1_worker::start_thread() { + std::unique_lock<std::mutex> lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(can_work) { + std::unique_lock<std::mutex> thread_lock(locking_mutex); + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + thread_lock.unlock(); + cond_var.notify_one(); // will notify "finish" + } + } + ); } -inline simdjson_result<dom::element>::operator double() const noexcept(false) { - return get<double>(); + + +inline void stage1_worker::stop_thread() { + std::unique_lock<std::mutex> lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + lock.unlock(); + cond_var.notify_all(); + if(thread.joinable()) { + thread.join(); + } } -inline simdjson_result<dom::element>::operator dom::array() const noexcept(false) { - return get<dom::array>(); + +inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) { + std::unique_lock<std::mutex> lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + lock.unlock(); + cond_var.notify_one();// will notify the thread lock } -inline simdjson_result<dom::element>::operator dom::object() const noexcept(false) { - return get<dom::object>(); +#endif + +really_inline document_stream::document_stream( + dom::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size}, + error{SUCCESS} +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif } -inline dom::array::iterator simdjson_result<dom::element>::begin() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.begin(); +really_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + error{UNINITIALIZED} { } -inline dom::array::iterator simdjson_result<dom::element>::end() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.end(); + +really_inline document_stream::~document_stream() noexcept { } -#endif +really_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(*this, error == EMPTY); +} -// -// simdjson_result<dom::array> inline implementation -// -really_inline simdjson_result<dom::array>::simdjson_result() noexcept - : internal::simdjson_result_base<dom::array>() {} -really_inline simdjson_result<dom::array>::simdjson_result(dom::array value) noexcept - : internal::simdjson_result_base<dom::array>(std::forward<dom::array>(value)) {} -really_inline simdjson_result<dom::array>::simdjson_result(error_code error) noexcept - : internal::simdjson_result_base<dom::array>(error) {} +really_inline document_stream::iterator document_stream::end() noexcept { + return iterator(*this, true); +} -#if SIMDJSON_EXCEPTIONS +really_inline document_stream::iterator::iterator(document_stream& _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} -inline dom::array::iterator simdjson_result<dom::array>::begin() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.begin(); +really_inline simdjson_result<element> document_stream::iterator::operator*() noexcept { + // Once we have yielded any errors, we're finished. + if (stream.error) { finished = true; return stream.error; } + return stream.parser->doc.root(); } -inline dom::array::iterator simdjson_result<dom::array>::end() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.end(); + +really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + stream.next(); + // If that was the last document, we're finished. + if (stream.error == EMPTY) { finished = true; } + return *this; } -#endif // SIMDJSON_EXCEPTIONS +really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} -inline simdjson_result<dom::element> simdjson_result<dom::array>::at(const std::string_view &json_pointer) const noexcept { - if (error()) { return error(); } - return first.at(json_pointer); +inline void document_stream::start() noexcept { + if (error) { return; } + + error = parser->ensure_capacity(batch_size); + if (error) { return; } + + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + if (error) { return; } + +#ifdef SIMDJSON_THREADS_ENABLED + if (next_batch_start() < len) { + // Kick off the first thread if needed + error = stage1_thread_parser.ensure_capacity(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } +#endif // SIMDJSON_THREADS_ENABLED + + next(); } -inline simdjson_result<dom::element> simdjson_result<dom::array>::at(size_t index) const noexcept { - if (error()) { return error(); } - return first.at(index); + +really_inline size_t document_stream::iterator::current_index() noexcept { + return stream.doc_index; } +inline void document_stream::next() noexcept { + if (error) { return; } -// -// simdjson_result<dom::object> inline implementation -// -really_inline simdjson_result<dom::object>::simdjson_result() noexcept - : internal::simdjson_result_base<dom::object>() {} -really_inline simdjson_result<dom::object>::simdjson_result(dom::object value) noexcept - : internal::simdjson_result_base<dom::object>(std::forward<dom::object>(value)) {} -really_inline simdjson_result<dom::object>::simdjson_result(error_code error) noexcept - : internal::simdjson_result_base<dom::object>(error) {} + // Load the next document from the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + // If that was the last document in the batch, load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } -inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const std::string_view &key) const noexcept { - if (error()) { return error(); } - return first[key]; +#ifdef SIMDJSON_THREADS_ENABLED + load_from_stage1_thread(); +#else + error = run_stage1(*parser, batch_start); +#endif + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + // Run stage 2 on the first document in the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + } } -inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const char *key) const noexcept { - if (error()) { return error(); } - return first[key]; + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -inline simdjson_result<dom::element> simdjson_result<dom::object>::at(const std::string_view &json_pointer) const noexcept { - if (error()) { return error(); } - return first.at(json_pointer); + +inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept { + // If this is the final batch, pass partial = false + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, false); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, true); + } } -inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key(const std::string_view &key) const noexcept { - if (error()) { return error(); } - return first.at_key(key); + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(*parser, stage1_thread_parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } -inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key_case_insensitive(const std::string_view &key) const noexcept { - if (error()) { return error(); } - return first.at_key_case_insensitive(key); + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } -#if SIMDJSON_EXCEPTIONS +#endif // SIMDJSON_THREADS_ENABLED -inline dom::object::iterator simdjson_result<dom::object>::begin() const noexcept(false) { +} // namespace dom + +really_inline simdjson_result<dom::document_stream>::simdjson_result() noexcept + : simdjson_result_base() { +} +really_inline simdjson_result<dom::document_stream>::simdjson_result(error_code error) noexcept + : simdjson_result_base(error) { +} +really_inline simdjson_result<dom::document_stream>::simdjson_result(dom::document_stream &&value) noexcept + : simdjson_result_base(std::forward<dom::document_stream>(value)) { +} + +#if SIMDJSON_EXCEPTIONS +really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } -inline dom::object::iterator simdjson_result<dom::object>::end() const noexcept(false) { +really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } - +#else // SIMDJSON_EXCEPTIONS +really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::begin() noexcept { + first.error = error(); + return first.begin(); +} +really_inline dom::document_stream::iterator simdjson_result<dom::document_stream>::end() noexcept { + first.error = error(); + return first.end(); +} #endif // SIMDJSON_EXCEPTIONS } // namespace simdjson +#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H +/* end file include/simdjson/inline/document_stream.h */ +/* begin file include/simdjson/inline/document.h */ +#ifndef SIMDJSON_INLINE_DOCUMENT_H +#define SIMDJSON_INLINE_DOCUMENT_H -namespace simdjson::dom { +// Inline implementations go in here. +#include <ostream> +#include <cstring> + +namespace simdjson { +namespace dom { + // // document inline implementation // inline element document::root() const noexcept { - return element(this, 1); + return element(internal::tape_ref(this, 1)); } -//#define REPORT_ERROR(CODE, MESSAGE) ((std::cerr << MESSAGE << std::endl), CODE) -#define REPORT_ERROR(CODE, MESSAGE) (CODE) -#define RETURN_ERROR(CODE, MESSAGE) return REPORT_ERROR((CODE), (MESSAGE)); - WARN_UNUSED inline error_code document::allocate(size_t capacity) noexcept { if (capacity == 0) { string_buf.reset(); tape.reset(); return SUCCESS; } // a pathological input like "[[[[..." would generate len tape elements, so // need a capacity of at least len + 1, but it is also possible to do - // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" + // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" //where len + 1 tape elements are // generated, see issue https://github.com/lemire/simdjson/issues/345 - size_t tape_capacity = ROUNDUP_N(capacity + 2, 64); + size_t tape_capacity = ROUNDUP_N(capacity + 3, 64); // a document with only zero-length strings... could have len/3 string // and we would need len/3 * 5 bytes on the string buffer size_t string_capacity = ROUNDUP_N(5 * capacity / 3 + 32, 64); string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); tape.reset(new (std::nothrow) uint64_t[tape_capacity]); @@ -3113,27 +5724,27 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept { uint32_t string_length; size_t tape_idx = 0; uint64_t tape_val = tape[tape_idx]; - uint8_t type = (tape_val >> 56); + uint8_t type = uint8_t(tape_val >> 56); os << tape_idx << " : " << type; tape_idx++; size_t how_many = 0; if (type == 'r') { - how_many = tape_val & internal::JSON_VALUE_MASK; + how_many = size_t(tape_val & internal::JSON_VALUE_MASK); } else { // Error: no starting root node? return false; } os << "\t// pointing to " << how_many << " (right after last node)\n"; uint64_t payload; for (; tape_idx < how_many; tape_idx++) { os << tape_idx << " : "; tape_val = tape[tape_idx]; payload = tape_val & internal::JSON_VALUE_MASK; - type = (tape_val >> 56); + type = uint8_t(tape_val >> 56); switch (type) { case '"': // we have a string os << "string \""; memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); os << internal::escape_json_string(std::string_view( @@ -3196,562 +5807,335 @@ return false; } } tape_val = tape[tape_idx]; payload = tape_val & internal::JSON_VALUE_MASK; - type = (tape_val >> 56); + type = uint8_t(tape_val >> 56); os << tape_idx << " : " << type << "\t// pointing to " << payload << " (start root)\n"; return true; } -// -// parser inline implementation -// -really_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity}, loaded_bytes(nullptr, &aligned_free_char) {} -inline bool parser::is_valid() const noexcept { return valid; } -inline int parser::get_error_code() const noexcept { return error; } -inline std::string parser::get_error_message() const noexcept { return error_message(error); } -inline bool parser::print_json(std::ostream &os) const noexcept { - if (!valid) { return false; } - os << doc.root(); - return true; -} -inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { - return valid ? doc.dump_raw_tape(os) : false; -} +} // namespace dom +} // namespace simdjson -inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept { - // Open the file - std::FILE *fp = std::fopen(path.c_str(), "rb"); - if (fp == nullptr) { - return IO_ERROR; - } +#endif // SIMDJSON_INLINE_DOCUMENT_H +/* end file include/simdjson/inline/document.h */ +/* begin file include/simdjson/inline/element.h */ +#ifndef SIMDJSON_INLINE_ELEMENT_H +#define SIMDJSON_INLINE_ELEMENT_H - // Get the file size - if(std::fseek(fp, 0, SEEK_END) < 0) { - std::fclose(fp); - return IO_ERROR; - } - long len = std::ftell(fp); - if((len < 0) || (len == LONG_MAX)) { - std::fclose(fp); - return IO_ERROR; - } +#include <cstring> +#include <utility> - // Make sure we have enough capacity to load the file - if (_loaded_bytes_capacity < size_t(len)) { - loaded_bytes.reset( internal::allocate_padded_buffer(len) ); - if (!loaded_bytes) { - std::fclose(fp); - return MEMALLOC; - } - _loaded_bytes_capacity = len; - } +namespace simdjson { - // Read the string - std::rewind(fp); - size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); - if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { - return IO_ERROR; - } - - return bytes_read; +// +// simdjson_result<dom::element> inline implementation +// +really_inline simdjson_result<dom::element>::simdjson_result() noexcept + : internal::simdjson_result_base<dom::element>() {} +really_inline simdjson_result<dom::element>::simdjson_result(dom::element &&value) noexcept + : internal::simdjson_result_base<dom::element>(std::forward<dom::element>(value)) {} +really_inline simdjson_result<dom::element>::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base<dom::element>(error) {} +inline simdjson_result<dom::element_type> simdjson_result<dom::element>::type() const noexcept { + if (error()) { return error(); } + return first.type(); } -inline simdjson_result<element> parser::load(const std::string &path) noexcept { - auto [len, code] = read_file(path); - if (code) { return code; } - - return parse(loaded_bytes.get(), len, false); +template<typename T> +really_inline bool simdjson_result<dom::element>::is() const noexcept { + return !error() && first.is<T>(); } - -inline document_stream parser::load_many(const std::string &path, size_t batch_size) noexcept { - auto [len, code] = read_file(path); - return document_stream(*this, (const uint8_t*)loaded_bytes.get(), len, batch_size, code); +template<typename T> +really_inline simdjson_result<T> simdjson_result<dom::element>::get() const noexcept { + if (error()) { return error(); } + return first.get<T>(); } +template<typename T> +WARN_UNUSED really_inline error_code simdjson_result<dom::element>::get(T &value) const noexcept { + if (error()) { return error(); } + return first.get<T>(value); +} -inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) noexcept { - error_code code = ensure_capacity(len); - if (code) { return code; } - - if (realloc_if_needed) { - const uint8_t *tmp_buf = buf; - buf = (uint8_t *)internal::allocate_padded_buffer(len); - if (buf == nullptr) - return MEMALLOC; - memcpy((void *)buf, tmp_buf, len); - } - - code = simdjson::active_implementation->parse(buf, len, *this); - if (realloc_if_needed) { - aligned_free((void *)buf); // must free before we exit - } - if (code) { return code; } - - // We're indicating validity via the simdjson_result<element>, so set the parse state back to invalid - valid = false; - error = UNINITIALIZED; - return doc.root(); +really_inline simdjson_result<dom::array> simdjson_result<dom::element>::get_array() const noexcept { + if (error()) { return error(); } + return first.get_array(); } -really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) noexcept { - return parse((const uint8_t *)buf, len, realloc_if_needed); +really_inline simdjson_result<dom::object> simdjson_result<dom::element>::get_object() const noexcept { + if (error()) { return error(); } + return first.get_object(); } -really_inline simdjson_result<element> parser::parse(const std::string &s) noexcept { - return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +really_inline simdjson_result<const char *> simdjson_result<dom::element>::get_c_str() const noexcept { + if (error()) { return error(); } + return first.get_c_str(); } -really_inline simdjson_result<element> parser::parse(const padded_string &s) noexcept { - return parse(s.data(), s.length(), false); +really_inline simdjson_result<size_t> simdjson_result<dom::element>::get_string_length() const noexcept { + if (error()) { return error(); } + return first.get_string_length(); } - -inline document_stream parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { - return document_stream(*this, buf, len, batch_size); +really_inline simdjson_result<std::string_view> simdjson_result<dom::element>::get_string() const noexcept { + if (error()) { return error(); } + return first.get_string(); } -inline document_stream parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { - return parse_many((const uint8_t *)buf, len, batch_size); +really_inline simdjson_result<int64_t> simdjson_result<dom::element>::get_int64() const noexcept { + if (error()) { return error(); } + return first.get_int64(); } -inline document_stream parser::parse_many(const std::string &s, size_t batch_size) noexcept { - return parse_many(s.data(), s.length(), batch_size); +really_inline simdjson_result<uint64_t> simdjson_result<dom::element>::get_uint64() const noexcept { + if (error()) { return error(); } + return first.get_uint64(); } -inline document_stream parser::parse_many(const padded_string &s, size_t batch_size) noexcept { - return parse_many(s.data(), s.length(), batch_size); +really_inline simdjson_result<double> simdjson_result<dom::element>::get_double() const noexcept { + if (error()) { return error(); } + return first.get_double(); } +really_inline simdjson_result<bool> simdjson_result<dom::element>::get_bool() const noexcept { + if (error()) { return error(); } + return first.get_bool(); +} -really_inline size_t parser::capacity() const noexcept { - return _capacity; +really_inline bool simdjson_result<dom::element>::is_array() const noexcept { + return !error() && first.is_array(); } -really_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +really_inline bool simdjson_result<dom::element>::is_object() const noexcept { + return !error() && first.is_object(); } -really_inline size_t parser::max_depth() const noexcept { - return _max_depth; +really_inline bool simdjson_result<dom::element>::is_string() const noexcept { + return !error() && first.is_string(); } - -WARN_UNUSED -inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { - // - // If capacity has changed, reallocate capacity-based buffers - // - if (_capacity != capacity) { - // Set capacity to 0 until we finish, in case there's an error - _capacity = 0; - - // - // Reallocate the document - // - error_code err = doc.allocate(capacity); - if (err) { return err; } - - // - // Don't allocate 0 bytes, just return. - // - if (capacity == 0) { - structural_indexes.reset(); - return SUCCESS; - } - - // - // Initialize stage 1 output - // - uint32_t max_structures = ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); // TODO realloc - if (!structural_indexes) { - return MEMALLOC; - } - - _capacity = capacity; - - // - // If capacity hasn't changed, but the document was taken, allocate a new document. - // - } else if (!doc.tape) { - error_code err = doc.allocate(capacity); - if (err) { return err; } - } - - // - // If max_depth has changed, reallocate those buffers - // - if (max_depth != _max_depth) { - _max_depth = 0; - - if (max_depth == 0) { - ret_address.reset(); - containing_scope_offset.reset(); - return SUCCESS; - } - - // - // Initialize stage 2 state - // - containing_scope_offset.reset(new (std::nothrow) uint32_t[max_depth]); // TODO realloc - #ifdef SIMDJSON_USE_COMPUTED_GOTO - ret_address.reset(new (std::nothrow) void *[max_depth]); - #else - ret_address.reset(new (std::nothrow) char[max_depth]); - #endif - - if (!ret_address || !containing_scope_offset) { - // Could not allocate memory - return MEMALLOC; - } - - _max_depth = max_depth; - } - return SUCCESS; +really_inline bool simdjson_result<dom::element>::is_int64() const noexcept { + return !error() && first.is_int64(); } - -WARN_UNUSED -inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { - return !allocate(capacity, max_depth); +really_inline bool simdjson_result<dom::element>::is_uint64() const noexcept { + return !error() && first.is_uint64(); } - -really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - _max_capacity = max_capacity; +really_inline bool simdjson_result<dom::element>::is_double() const noexcept { + return !error() && first.is_double(); } - -inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { - // If we don't have enough capacity, (try to) automatically bump it. - // If the document was taken, reallocate that too. - // Both in one if statement to minimize unlikely branching. - if (unlikely(desired_capacity > capacity() || !doc.tape)) { - if (desired_capacity > max_capacity()) { - return error = CAPACITY; - } - return allocate(desired_capacity, _max_depth > 0 ? _max_depth : DEFAULT_MAX_DEPTH); - } - - return SUCCESS; +really_inline bool simdjson_result<dom::element>::is_bool() const noexcept { + return !error() && first.is_bool(); } -// -// array inline implementation -// -really_inline array::array() noexcept : internal::tape_ref() {} -really_inline array::array(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) {} -inline array::iterator array::begin() const noexcept { - return iterator(doc, json_index + 1); +really_inline bool simdjson_result<dom::element>::is_null() const noexcept { + return !error() && first.is_null(); } -inline array::iterator array::end() const noexcept { - return iterator(doc, after_element() - 1); -} -inline simdjson_result<element> array::at(const std::string_view &json_pointer) const noexcept { - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } - - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i]) - '0'; - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; - } - - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { RETURN_ERROR(INVALID_JSON_POINTER, "JSON pointer array index has other characters after 0"); } - - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { RETURN_ERROR(INVALID_JSON_POINTER, "Empty string in JSON pointer array index"); } - - // Get the child - auto child = array(doc, json_index).at(array_index); - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at(json_pointer.substr(i+1)); - } - return child; +really_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const std::string_view &key) const noexcept { + if (error()) { return error(); } + return first[key]; } -inline simdjson_result<element> array::at(size_t index) const noexcept { - size_t i=0; - for (auto element : *this) { - if (i == index) { return element; } - i++; - } - return INDEX_OUT_OF_BOUNDS; +really_inline simdjson_result<dom::element> simdjson_result<dom::element>::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; } - -// -// array::iterator inline implementation -// -really_inline array::iterator::iterator(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { } -inline element array::iterator::operator*() const noexcept { - return element(doc, json_index); +really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(const std::string_view &json_pointer) const noexcept { + if (error()) { return error(); } + return first.at(json_pointer); } -inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { - return json_index != other.json_index; +really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); } -inline void array::iterator::operator++() noexcept { - json_index = after_element(); +really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key(const std::string_view &key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); } - -// -// object inline implementation -// -really_inline object::object() noexcept : internal::tape_ref() {} -really_inline object::object(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { }; -inline object::iterator object::begin() const noexcept { - return iterator(doc, json_index + 1); +really_inline simdjson_result<dom::element> simdjson_result<dom::element>::at_key_case_insensitive(const std::string_view &key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); } -inline object::iterator object::end() const noexcept { - return iterator(doc, after_element() - 1); -} -inline simdjson_result<element> object::operator[](const std::string_view &key) const noexcept { - return at_key(key); -} -inline simdjson_result<element> object::operator[](const char *key) const noexcept { - return at_key(key); -} -inline simdjson_result<element> object::at(const std::string_view &json_pointer) const noexcept { - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); +#if SIMDJSON_EXCEPTIONS - // Grab the child with the given key - simdjson_result<element> child; - - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - RETURN_ERROR(INVALID_JSON_POINTER, "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = at_key(unescaped); - } else { - child = at_key(key); - } - - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at(json_pointer.substr(slash+1)); - } - - return child; +really_inline simdjson_result<dom::element>::operator bool() const noexcept(false) { + return get<bool>(); } -inline simdjson_result<element> object::at_key(const std::string_view &key) const noexcept { - iterator end_field = end(); - for (iterator field = begin(); field != end_field; ++field) { - if (key == field.key()) { - return field.value(); - } - } - return NO_SUCH_FIELD; +really_inline simdjson_result<dom::element>::operator const char *() const noexcept(false) { + return get<const char *>(); } -// In case you wonder why we need this, please see -// https://github.com/simdjson/simdjson/issues/323 -// People do seek keys in a case-insensitive manner. -inline simdjson_result<element> object::at_key_case_insensitive(const std::string_view &key) const noexcept { - iterator end_field = end(); - for (iterator field = begin(); field != end_field; ++field) { - auto field_key = field.key(); - if (key.length() == field_key.length()) { - bool equal = true; - for (size_t i=0; i<field_key.length(); i++) { - equal = equal && std::tolower(key[i]) != std::tolower(field_key[i]); - } - if (equal) { return field.value(); } - } - } - return NO_SUCH_FIELD; +really_inline simdjson_result<dom::element>::operator std::string_view() const noexcept(false) { + return get<std::string_view>(); } - -// -// object::iterator inline implementation -// -really_inline object::iterator::iterator(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { } -inline const key_value_pair object::iterator::operator*() const noexcept { - return key_value_pair(key(), value()); +really_inline simdjson_result<dom::element>::operator uint64_t() const noexcept(false) { + return get<uint64_t>(); } -inline bool object::iterator::operator!=(const object::iterator& other) const noexcept { - return json_index != other.json_index; +really_inline simdjson_result<dom::element>::operator int64_t() const noexcept(false) { + return get<int64_t>(); } -inline void object::iterator::operator++() noexcept { - json_index++; - json_index = after_element(); +really_inline simdjson_result<dom::element>::operator double() const noexcept(false) { + return get<double>(); } -inline std::string_view object::iterator::key() const noexcept { - size_t string_buf_index = tape_value(); - uint32_t len; - memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); - return std::string_view( - reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]), - len - ); +really_inline simdjson_result<dom::element>::operator dom::array() const noexcept(false) { + return get<dom::array>(); } -inline const char* object::iterator::key_c_str() const noexcept { - return reinterpret_cast<const char *>(&doc->string_buf[tape_value() + sizeof(uint32_t)]); +really_inline simdjson_result<dom::element>::operator dom::object() const noexcept(false) { + return get<dom::object>(); } -inline element object::iterator::value() const noexcept { - return element(doc, json_index + 1); + +really_inline dom::array::iterator simdjson_result<dom::element>::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); } +really_inline dom::array::iterator simdjson_result<dom::element>::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} -// -// key_value_pair inline implementation -// -inline key_value_pair::key_value_pair(const std::string_view &_key, element _value) noexcept : - key(_key), value(_value) {} +#endif // SIMDJSON_EXCEPTIONS +namespace dom { + // // element inline implementation // -really_inline element::element() noexcept : internal::tape_ref() {} -really_inline element::element(const document *_doc, size_t _json_index) noexcept : internal::tape_ref(_doc, _json_index) { } +really_inline element::element() noexcept : tape{} {} +really_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline element_type element::type() const noexcept { - switch (tape_ref_type()) { - case internal::tape_type::START_ARRAY: - return element_type::ARRAY; - case internal::tape_type::START_OBJECT: - return element_type::OBJECT; - case internal::tape_type::INT64: - return element_type::INT64; - case internal::tape_type::UINT64: - return element_type::UINT64; - case internal::tape_type::DOUBLE: - return element_type::DOUBLE; - case internal::tape_type::STRING: - return element_type::STRING; - case internal::tape_type::TRUE_VALUE: - case internal::tape_type::FALSE_VALUE: - return element_type::BOOL; - case internal::tape_type::NULL_VALUE: - return element_type::NULL_VALUE; - case internal::tape_type::ROOT: - case internal::tape_type::END_ARRAY: - case internal::tape_type::END_OBJECT: - default: - abort(); - } + auto tape_type = tape.tape_ref_type(); + return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast<element_type>(tape_type); } -really_inline bool element::is_null() const noexcept { - return tape_ref_type() == internal::tape_type::NULL_VALUE; -} -template<> -inline simdjson_result<bool> element::get<bool>() const noexcept { - switch (tape_ref_type()) { - case internal::tape_type::TRUE_VALUE: - return true; - case internal::tape_type::FALSE_VALUE: - return false; +inline simdjson_result<bool> element::get_bool() const noexcept { + if(tape.is_true()) { + return true; + } else if(tape.is_false()) { + return false; + } + return INCORRECT_TYPE; +} +inline simdjson_result<const char *> element::get_c_str() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_c_str(); + } default: return INCORRECT_TYPE; } } -template<> -inline simdjson_result<const char *> element::get<const char *>() const noexcept { - switch (tape_ref_type()) { +inline simdjson_result<size_t> element::get_string_length() const noexcept { + switch (tape.tape_ref_type()) { case internal::tape_type::STRING: { - size_t string_buf_index = tape_value(); - return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); + return tape.get_string_length(); } default: return INCORRECT_TYPE; } } -template<> -inline simdjson_result<std::string_view> element::get<std::string_view>() const noexcept { - switch (tape_ref_type()) { +inline simdjson_result<std::string_view> element::get_string() const noexcept { + switch (tape.tape_ref_type()) { case internal::tape_type::STRING: - return get_string_view(); + return tape.get_string_view(); default: return INCORRECT_TYPE; } } -template<> -inline simdjson_result<uint64_t> element::get<uint64_t>() const noexcept { - switch (tape_ref_type()) { - case internal::tape_type::UINT64: - return next_tape_value<uint64_t>(); - case internal::tape_type::INT64: { - int64_t result = next_tape_value<int64_t>(); +inline simdjson_result<uint64_t> element::get_uint64() const noexcept { + if(unlikely(!tape.is_uint64())) { // branch rarely taken + if(tape.is_int64()) { + int64_t result = tape.next_tape_value<int64_t>(); if (result < 0) { return NUMBER_OUT_OF_RANGE; } - return static_cast<uint64_t>(result); + return uint64_t(result); } - default: - return INCORRECT_TYPE; + return INCORRECT_TYPE; } + return tape.next_tape_value<int64_t>(); } -template<> -inline simdjson_result<int64_t> element::get<int64_t>() const noexcept { - switch (tape_ref_type()) { - case internal::tape_type::UINT64: { - uint64_t result = next_tape_value<uint64_t>(); +inline simdjson_result<int64_t> element::get_int64() const noexcept { + if(unlikely(!tape.is_int64())) { // branch rarely taken + if(tape.is_uint64()) { + uint64_t result = tape.next_tape_value<uint64_t>(); // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std - if (result > (std::numeric_limits<int64_t>::max)()) { + if (result > uint64_t((std::numeric_limits<int64_t>::max)())) { return NUMBER_OUT_OF_RANGE; } return static_cast<int64_t>(result); } - case internal::tape_type::INT64: - return next_tape_value<int64_t>(); - default: - return INCORRECT_TYPE; + return INCORRECT_TYPE; } + return tape.next_tape_value<int64_t>(); } -template<> -inline simdjson_result<double> element::get<double>() const noexcept { - switch (tape_ref_type()) { - case internal::tape_type::UINT64: - return next_tape_value<uint64_t>(); - case internal::tape_type::INT64: { - return next_tape_value<int64_t>(); - int64_t result = tape_value(); - if (result < 0) { - return NUMBER_OUT_OF_RANGE; - } - return double(result); +inline simdjson_result<double> element::get_double() const noexcept { + // Performance considerations: + // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight + // comparison. + // 2. Using a switch-case relies on the compiler guessing what kind of code generation + // we want... But the compiler cannot know that we expect the type to be "double" + // most of the time. + // We can expect get<double> to refer to a double type almost all the time. + // It is important to craft the code accordingly so that the compiler can use this + // information. (This could also be solved with profile-guided optimization.) + if(unlikely(!tape.is_double())) { // branch rarely taken + if(tape.is_uint64()) { + return double(tape.next_tape_value<uint64_t>()); + } else if(tape.is_int64()) { + return double(tape.next_tape_value<int64_t>()); } - case internal::tape_type::DOUBLE: - return next_tape_value<double>(); - default: - return INCORRECT_TYPE; + return INCORRECT_TYPE; } + // this is common: + return tape.next_tape_value<double>(); } -template<> -inline simdjson_result<array> element::get<array>() const noexcept { - switch (tape_ref_type()) { +inline simdjson_result<array> element::get_array() const noexcept { + switch (tape.tape_ref_type()) { case internal::tape_type::START_ARRAY: - return array(doc, json_index); + return array(tape); default: return INCORRECT_TYPE; } } -template<> -inline simdjson_result<object> element::get<object>() const noexcept { - switch (tape_ref_type()) { +inline simdjson_result<object> element::get_object() const noexcept { + switch (tape.tape_ref_type()) { case internal::tape_type::START_OBJECT: - return object(doc, json_index); + return object(tape); default: return INCORRECT_TYPE; } } template<typename T> +WARN_UNUSED really_inline error_code element::get(T &value) const noexcept { + return get<T>().get(value); +} +// An element-specific version prevents recursion with simdjson_result::get<element>(value) +template<> +WARN_UNUSED really_inline error_code element::get<element>(element &value) const noexcept { + value = element(tape); + return SUCCESS; +} + +template<typename T> really_inline bool element::is() const noexcept { auto result = get<T>(); return !result.error(); } +template<> inline simdjson_result<array> element::get<array>() const noexcept { return get_array(); } +template<> inline simdjson_result<object> element::get<object>() const noexcept { return get_object(); } +template<> inline simdjson_result<const char *> element::get<const char *>() const noexcept { return get_c_str(); } +template<> inline simdjson_result<std::string_view> element::get<std::string_view>() const noexcept { return get_string(); } +template<> inline simdjson_result<int64_t> element::get<int64_t>() const noexcept { return get_int64(); } +template<> inline simdjson_result<uint64_t> element::get<uint64_t>() const noexcept { return get_uint64(); } +template<> inline simdjson_result<double> element::get<double>() const noexcept { return get_double(); } +template<> inline simdjson_result<bool> element::get<bool>() const noexcept { return get_bool(); } + +inline bool element::is_array() const noexcept { return is<array>(); } +inline bool element::is_object() const noexcept { return is<object>(); } +inline bool element::is_string() const noexcept { return is<std::string_view>(); } +inline bool element::is_int64() const noexcept { return is<int64_t>(); } +inline bool element::is_uint64() const noexcept { return is<uint64_t>(); } +inline bool element::is_double() const noexcept { return is<double>(); } +inline bool element::is_bool() const noexcept { return is<bool>(); } + +inline bool element::is_null() const noexcept { + return tape.is_null_on_tape(); +} + #if SIMDJSON_EXCEPTIONS inline element::operator bool() const noexcept(false) { return get<bool>(); } inline element::operator const char*() const noexcept(false) { return get<const char *>(); } inline element::operator std::string_view() const noexcept(false) { return get<std::string_view>(); } @@ -3766,24 +6150,24 @@ } inline array::iterator element::end() const noexcept(false) { return get<array>().end(); } -#endif +#endif // SIMDJSON_EXCEPTIONS inline simdjson_result<element> element::operator[](const std::string_view &key) const noexcept { return at_key(key); } inline simdjson_result<element> element::operator[](const char *key) const noexcept { return at_key(key); } inline simdjson_result<element> element::at(const std::string_view &json_pointer) const noexcept { - switch (tape_ref_type()) { + switch (tape.tape_ref_type()) { case internal::tape_type::START_OBJECT: - return object(doc, json_index).at(json_pointer); + return object(tape).at(json_pointer); case internal::tape_type::START_ARRAY: - return array(doc, json_index).at(json_pointer); + return array(tape).at(json_pointer); default: return INCORRECT_TYPE; } } inline simdjson_result<element> element::at(size_t index) const noexcept { @@ -3795,31 +6179,52 @@ inline simdjson_result<element> element::at_key_case_insensitive(const std::string_view &key) const noexcept { return get<object>().at_key_case_insensitive(key); } inline bool element::dump_raw_tape(std::ostream &out) const noexcept { - return doc->dump_raw_tape(out); + return tape.doc->dump_raw_tape(out); } -} // namespace simdjson::dom +inline std::ostream& operator<<(std::ostream& out, const element &value) { + return out << minify<element>(value); +} -namespace simdjson { +inline std::ostream& operator<<(std::ostream& out, element_type type) { + switch (type) { + case element_type::ARRAY: + return out << "array"; + case element_type::OBJECT: + return out << "object"; + case element_type::INT64: + return out << "int64_t"; + case element_type::UINT64: + return out << "uint64_t"; + case element_type::DOUBLE: + return out << "double"; + case element_type::STRING: + return out << "string"; + case element_type::BOOL: + return out << "bool"; + case element_type::NULL_VALUE: + return out << "null"; + default: + return out << "unexpected content!!!"; // abort() usage is forbidden in the library + } +} -// -// minify inline implementation -// +} // namespace dom template<> -inline std::ostream& minify<dom::element>::print(std::ostream& out) { +inline std::ostream& minifier<dom::element>::print(std::ostream& out) { using tape_type=internal::tape_type; size_t depth = 0; constexpr size_t MAX_DEPTH = 16; bool is_object[MAX_DEPTH]; is_object[0] = false; bool after_value = false; - internal::tape_ref iter(value); + internal::tape_ref iter(value.tape); do { // print commas after each value if (after_value) { out << ","; } @@ -3833,12 +6238,12 @@ // Arrays case tape_type::START_ARRAY: { // If we're too deep, we need to recurse to go deeper. depth++; if (unlikely(depth >= MAX_DEPTH)) { - out << minify<dom::array>(dom::array(iter.doc, iter.json_index)); - iter.json_index = iter.tape_value() - 1; // Jump to the ] + out << minify<dom::array>(dom::array(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] depth--; break; } // Output start [ @@ -3860,12 +6265,12 @@ // Objects case tape_type::START_OBJECT: { // If we're too deep, we need to recurse to go deeper. depth++; if (unlikely(depth >= MAX_DEPTH)) { - out << minify<dom::object>(dom::object(iter.doc, iter.json_index)); - iter.json_index = iter.tape_value() - 1; // Jump to the } + out << minify<dom::object>(dom::object(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the } depth--; break; } // Output start { @@ -3912,11 +6317,11 @@ // These are impossible case tape_type::END_ARRAY: case tape_type::END_OBJECT: case tape_type::ROOT: - abort(); + out << "unexpected content!!!"; // abort() usage is forbidden in the library } iter.json_index++; after_value = true; // Handle multiple ends in a row @@ -3929,430 +6334,47 @@ // Stop when we're at depth 0 } while (depth != 0); return out; } -template<> -inline std::ostream& minify<dom::object>::print(std::ostream& out) { - out << '{'; - auto pair = value.begin(); - auto end = value.end(); - if (pair != end) { - out << minify<dom::key_value_pair>(*pair); - for (++pair; pair != end; ++pair) { - out << "," << minify<dom::key_value_pair>(*pair); - } - } - return out << '}'; -} -template<> -inline std::ostream& minify<dom::array>::print(std::ostream& out) { - out << '['; - auto iter = value.begin(); - auto end = value.end(); - if (iter != end) { - out << minify<dom::element>(*iter); - for (++iter; iter != end; ++iter) { - out << "," << minify<dom::element>(*iter); - } - } - return out << ']'; -} -template<> -inline std::ostream& minify<dom::key_value_pair>::print(std::ostream& out) { - return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value; -} #if SIMDJSON_EXCEPTIONS template<> -inline std::ostream& minify<simdjson_result<dom::element>>::print(std::ostream& out) { +really_inline std::ostream& minifier<simdjson_result<dom::element>>::print(std::ostream& out) { if (value.error()) { throw simdjson_error(value.error()); } return out << minify<dom::element>(value.first); } -template<> -inline std::ostream& minify<simdjson_result<dom::array>>::print(std::ostream& out) { - if (value.error()) { throw simdjson_error(value.error()); } - return out << minify<dom::array>(value.first); -} -template<> -inline std::ostream& minify<simdjson_result<dom::object>>::print(std::ostream& out) { - if (value.error()) { throw simdjson_error(value.error()); } - return out << minify<dom::object>(value.first); -} +really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false) { + return out << minify<simdjson_result<dom::element>>(value); +} #endif } // namespace simdjson -namespace simdjson::internal { - -// -// tape_ref inline implementation -// -really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} -really_inline tape_ref::tape_ref(const document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} - -inline size_t tape_ref::after_element() const noexcept { - switch (tape_ref_type()) { - case tape_type::START_ARRAY: - case tape_type::START_OBJECT: - return tape_value(); - case tape_type::UINT64: - case tape_type::INT64: - case tape_type::DOUBLE: - return json_index + 2; - default: - return json_index + 1; - } -} -really_inline tape_type tape_ref::tape_ref_type() const noexcept { - return static_cast<tape_type>(doc->tape[json_index] >> 56); -} -really_inline uint64_t internal::tape_ref::tape_value() const noexcept { - return doc->tape[json_index] & internal::JSON_VALUE_MASK; -} -template<typename T> -really_inline T tape_ref::next_tape_value() const noexcept { - static_assert(sizeof(T) == sizeof(uint64_t)); - return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]); -} -inline std::string_view internal::tape_ref::get_string_view() const noexcept { - size_t string_buf_index = tape_value(); - uint32_t len; - memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); - return std::string_view( - reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]), - len - ); -} - - -} // namespace simdjson::internal - -#endif // SIMDJSON_INLINE_DOCUMENT_H -/* end file include/simdjson/inline/document.h */ -/* begin file include/simdjson/inline/document_stream.h */ -#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H -#define SIMDJSON_INLINE_DOCUMENT_STREAM_H - -#include <algorithm> -#include <limits> -#include <stdexcept> -#include <thread> - -namespace simdjson::internal { - -/** - * This algorithm is used to quickly identify the buffer position of - * the last JSON document inside the current batch. - * - * It does its work by finding the last pair of structural characters - * that represent the end followed by the start of a document. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ';' ',' - * and when the second element is NOT one of these characters: '}' '}' ';' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and means that we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete - * document, therefore the last json buffer location is the end of the batch - * */ -inline size_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const dom::parser &parser) { - // this function can be generally useful - if (parser.n_structural_indexes == 0) - return 0; - auto last_i = parser.n_structural_indexes - 1; - if (parser.structural_indexes[last_i] == size) { - if (last_i == 0) - return 0; - last_i = parser.n_structural_indexes - 2; - } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = last_i; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - if (!arr_cnt && !obj_cnt) { - return last_i + 1; - } - return i; - } - return 0; -} - -// returns true if the provided byte value is an ASCII character -static inline bool is_ascii(char c) { - return ((unsigned char)c) <= 127; -} - -// if the string ends with UTF-8 values, backtrack -// up to the first ASCII character. May return 0. -static inline size_t trimmed_length_safe_utf8(const char * c, size_t len) { - while ((len > 0) and (not is_ascii(c[len - 1]))) { - len--; - } - return len; -} - -} // namespace simdjson::internal - -namespace simdjson::dom { - -really_inline document_stream::document_stream( - dom::parser &_parser, - const uint8_t *buf, - size_t len, - size_t batch_size, - error_code _error -) noexcept : parser{_parser}, _buf{buf}, _len{len}, _batch_size(batch_size), error{_error} { - if (!error) { error = json_parse(); } -} - -inline document_stream::~document_stream() noexcept { -#ifdef SIMDJSON_THREADS_ENABLED - if (stage_1_thread.joinable()) { - stage_1_thread.join(); - } -#endif -} - -really_inline document_stream::iterator document_stream::begin() noexcept { - return iterator(*this, false); -} - -really_inline document_stream::iterator document_stream::end() noexcept { - return iterator(*this, true); -} - -really_inline document_stream::iterator::iterator(document_stream& _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { -} - -really_inline simdjson_result<element> document_stream::iterator::operator*() noexcept { - error_code err = stream.error == SUCCESS_AND_HAS_MORE ? SUCCESS : stream.error; - if (err) { return err; } - return stream.parser.doc.root(); -} - -really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - if (stream.error == SUCCESS_AND_HAS_MORE) { - stream.error = stream.json_parse(); - } else { - finished = true; - } - return *this; -} - -really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; -} - -#ifdef SIMDJSON_THREADS_ENABLED - -// threaded version of json_parse -// todo: simplify this code further -inline error_code document_stream::json_parse() noexcept { - error = parser.ensure_capacity(_batch_size); - if (error) { return error; } - error = parser_thread.ensure_capacity(_batch_size); - if (error) { return error; } - - if (unlikely(load_next_batch)) { - // First time loading - if (!stage_1_thread.joinable()) { - _batch_size = (std::min)(_batch_size, remaining()); - _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size); - if (_batch_size == 0) { - return simdjson::UTF8_ERROR; - } - auto stage1_is_ok = error_code(simdjson::active_implementation->stage1(buf(), _batch_size, parser, true)); - if (stage1_is_ok != simdjson::SUCCESS) { - return stage1_is_ok; - } - size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser); - if (last_index == 0) { - if (parser.n_structural_indexes == 0) { - return simdjson::EMPTY; - } - } else { - parser.n_structural_indexes = last_index + 1; - } - } - // the second thread is running or done. - else { - stage_1_thread.join(); - if (stage1_is_ok_thread != simdjson::SUCCESS) { - return stage1_is_ok_thread; - } - std::swap(parser.structural_indexes, parser_thread.structural_indexes); - parser.n_structural_indexes = parser_thread.n_structural_indexes; - advance(last_json_buffer_loc); - n_bytes_parsed += last_json_buffer_loc; - } - // let us decide whether we will start a new thread - if (remaining() - _batch_size > 0) { - last_json_buffer_loc = - parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)]; - _batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc); - if (_batch_size > 0) { - _batch_size = internal::trimmed_length_safe_utf8( - (const char *)(buf() + last_json_buffer_loc), _batch_size); - if (_batch_size == 0) { - return simdjson::UTF8_ERROR; - } - // let us capture read-only variables - const uint8_t *const b = buf() + last_json_buffer_loc; - const size_t bs = _batch_size; - // we call the thread on a lambda that will update - // this->stage1_is_ok_thread - // there is only one thread that may write to this value - stage_1_thread = std::thread([this, b, bs] { - this->stage1_is_ok_thread = error_code(simdjson::active_implementation->stage1(b, bs, this->parser_thread, true)); - }); - } - } - next_json = 0; - load_next_batch = false; - } // load_next_batch - error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json); - if (res == simdjson::SUCCESS_AND_HAS_MORE) { - n_parsed_docs++; - current_buffer_loc = parser.structural_indexes[next_json]; - load_next_batch = (current_buffer_loc == last_json_buffer_loc); - } else if (res == simdjson::SUCCESS) { - n_parsed_docs++; - if (remaining() > _batch_size) { - current_buffer_loc = parser.structural_indexes[next_json - 1]; - load_next_batch = true; - res = simdjson::SUCCESS_AND_HAS_MORE; - } - } - return res; -} - -#else // SIMDJSON_THREADS_ENABLED - -// single-threaded version of json_parse -inline error_code document_stream::json_parse() noexcept { - error = parser.ensure_capacity(_batch_size); - if (error) { return error; } - - if (unlikely(load_next_batch)) { - advance(current_buffer_loc); - n_bytes_parsed += current_buffer_loc; - _batch_size = (std::min)(_batch_size, remaining()); - _batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size); - auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1(buf(), _batch_size, parser, true); - if (stage1_is_ok != simdjson::SUCCESS) { - return stage1_is_ok; - } - size_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser); - if (last_index == 0) { - if (parser.n_structural_indexes == 0) { - return EMPTY; - } - } else { - parser.n_structural_indexes = last_index + 1; - } - load_next_batch = false; - } // load_next_batch - error_code res = simdjson::active_implementation->stage2(buf(), remaining(), parser, next_json); - if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) { - n_parsed_docs++; - current_buffer_loc = parser.structural_indexes[next_json]; - } else if (res == simdjson::SUCCESS) { - n_parsed_docs++; - if (remaining() > _batch_size) { - current_buffer_loc = parser.structural_indexes[next_json - 1]; - next_json = 1; - load_next_batch = true; - res = simdjson::SUCCESS_AND_HAS_MORE; - } - } - return res; -} -#endif // SIMDJSON_THREADS_ENABLED - -} // namespace simdjson::dom -#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H -/* end file include/simdjson/inline/document_stream.h */ +#endif // SIMDJSON_INLINE_ELEMENT_H +/* end file include/simdjson/inline/element.h */ /* begin file include/simdjson/inline/error.h */ #ifndef SIMDJSON_INLINE_ERROR_H #define SIMDJSON_INLINE_ERROR_H +#include <cstring> #include <string> +#include <utility> -namespace simdjson::internal { +namespace simdjson { +namespace internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { error_code code; std::string message; }; // These MUST match the codes in error_code. We check this constraint in basictests. - inline const error_code_info error_codes[] { - { SUCCESS, "No error" }, - { SUCCESS_AND_HAS_MORE, "No error and buffer still has more data" }, - { CAPACITY, "This parser can't support a document that big" }, - { MEMALLOC, "Error allocating memory, we're most likely out of memory" }, - { TAPE_ERROR, "Something went wrong while writing to the tape" }, - { DEPTH_ERROR, "The JSON document was too deep (too many nested objects and arrays)" }, - { STRING_ERROR, "Problem while parsing a string" }, - { T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'" }, - { F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'" }, - { N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'" }, - { NUMBER_ERROR, "Problem while parsing a number" }, - { UTF8_ERROR, "The input is not valid UTF-8" }, - { UNINITIALIZED, "Uninitialized" }, - { EMPTY, "Empty: no JSON found" }, - { UNESCAPED_CHARS, "Within strings, some characters must be escaped, we found unescaped characters" }, - { UNCLOSED_STRING, "A string is opened, but never closed." }, - { UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation supported by this CPU architecture (perhaps it's a non-SIMD CPU?)." }, - { INCORRECT_TYPE, "The JSON element does not have the requested type." }, - { NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type." }, - { INDEX_OUT_OF_BOUNDS, "Attempted to access an element of a JSON array that is beyond its length." }, - { NO_SUCH_FIELD, "The JSON field referenced does not exist in this object." }, - { IO_ERROR, "Error reading the file." }, - { INVALID_JSON_POINTER, "Invalid JSON pointer syntax." }, - { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." }, - { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" } - }; // error_messages[] -} // namespace simdjson::internal + extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; +} // namespace internal -namespace simdjson { inline const char *error_message(error_code error) noexcept { // If you're using error_code, we're trusting you got it from the enum. return internal::error_codes[int(error)].message.c_str(); } @@ -4377,32 +6399,41 @@ template<typename T> really_inline void simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept { // on the clang compiler that comes with current macOS (Apple clang version 11.0.0), // tie(width, error) = size["w"].get<uint64_t>(); // fails with "error: no viable overloaded '='"" - value = std::forward<simdjson_result_base<T>>(*this).first; error = this->second; + if (!error) { + value = std::forward<simdjson_result_base<T>>(*this).first; + } } template<typename T> +WARN_UNUSED really_inline error_code simdjson_result_base<T>::get(T &value) && noexcept { + error_code error; + std::forward<simdjson_result_base<T>>(*this).tie(value, error); + return error; +} + +template<typename T> really_inline error_code simdjson_result_base<T>::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template<typename T> really_inline T& simdjson_result_base<T>::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; -}; +} template<typename T> really_inline T&& simdjson_result_base<T>::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward<T>(this->first); -}; +} template<typename T> really_inline simdjson_result_base<T>::operator T&&() && noexcept(false) { return std::forward<simdjson_result_base<T>>(*this).take_value(); } @@ -4432,10 +6463,15 @@ really_inline void simdjson_result<T>::tie(T &value, error_code &error) && noexcept { std::forward<internal::simdjson_result_base<T>>(*this).tie(value, error); } template<typename T> +WARN_UNUSED really_inline error_code simdjson_result<T>::get(T &value) && noexcept { + return std::forward<internal::simdjson_result_base<T>>(*this).get(value); +} + +template<typename T> really_inline error_code simdjson_result<T>::error() const noexcept { return internal::simdjson_result_base<T>::error(); } #if SIMDJSON_EXCEPTIONS @@ -4472,46 +6508,305 @@ } // namespace simdjson #endif // SIMDJSON_INLINE_ERROR_H /* end file include/simdjson/inline/error.h */ +/* begin file include/simdjson/inline/object.h */ +#ifndef SIMDJSON_INLINE_OBJECT_H +#define SIMDJSON_INLINE_OBJECT_H + +#include <cstring> +#include <string> + +namespace simdjson { + +// +// simdjson_result<dom::object> inline implementation +// +really_inline simdjson_result<dom::object>::simdjson_result() noexcept + : internal::simdjson_result_base<dom::object>() {} +really_inline simdjson_result<dom::object>::simdjson_result(dom::object value) noexcept + : internal::simdjson_result_base<dom::object>(std::forward<dom::object>(value)) {} +really_inline simdjson_result<dom::object>::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base<dom::object>(error) {} + +inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const std::string_view &key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result<dom::element> simdjson_result<dom::object>::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result<dom::element> simdjson_result<dom::object>::at(const std::string_view &json_pointer) const noexcept { + if (error()) { return error(); } + return first.at(json_pointer); +} +inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key(const std::string_view &key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +inline simdjson_result<dom::element> simdjson_result<dom::object>::at_key_case_insensitive(const std::string_view &key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +inline dom::object::iterator simdjson_result<dom::object>::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::object::iterator simdjson_result<dom::object>::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result<dom::object>::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// object inline implementation +// +really_inline object::object() noexcept : tape{} {} +really_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline object::iterator object::begin() const noexcept { + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline object::iterator object::end() const noexcept { + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t object::size() const noexcept { + return tape.scope_count(); +} + +inline simdjson_result<element> object::operator[](const std::string_view &key) const noexcept { + return at_key(key); +} +inline simdjson_result<element> object::operator[](const char *key) const noexcept { + return at_key(key); +} +inline simdjson_result<element> object::at(const std::string_view &json_pointer) const noexcept { + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + + // Grab the child with the given key + simdjson_result<element> child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = at_key(unescaped); + } else { + child = at_key(key); + } + + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at(json_pointer.substr(slash+1)); + } + + return child; +} +inline simdjson_result<element> object::at_key(const std::string_view &key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} +// In case you wonder why we need this, please see +// https://github.com/simdjson/simdjson/issues/323 +// People do seek keys in a case-insensitive manner. +inline simdjson_result<element> object::at_key_case_insensitive(const std::string_view &key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals_case_insensitive(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} + +// +// object::iterator inline implementation +// +really_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline const key_value_pair object::iterator::operator*() const noexcept { + return key_value_pair(key(), value()); +} +inline bool object::iterator::operator!=(const object::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline object::iterator& object::iterator::operator++() noexcept { + tape.json_index++; + tape.json_index = tape.after_element(); + return *this; +} +inline std::string_view object::iterator::key() const noexcept { + return tape.get_string_view(); +} +inline uint32_t object::iterator::key_length() const noexcept { + return tape.get_string_length(); +} +inline const char* object::iterator::key_c_str() const noexcept { + return reinterpret_cast<const char *>(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]); +} +inline element object::iterator::value() const noexcept { + return element(internal::tape_ref(tape.doc, tape.json_index + 1)); +} + +/** + * Design notes: + * Instead of constructing a string_view and then comparing it with a + * user-provided strings, it is probably more performant to have dedicated + * functions taking as a parameter the string we want to compare against + * and return true when they are equal. That avoids the creation of a temporary + * std::string_view. Though it is possible for the compiler to avoid entirely + * any overhead due to string_view, relying too much on compiler magic is + * problematic: compiler magic sometimes fail, and then what do you do? + * Also, enticing users to rely on high-performance function is probably better + * on the long run. + */ + +inline bool object::iterator::key_equals(const std::string_view & o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // We avoid construction of a temporary string_view instance. + return (memcmp(o.data(), key_c_str(), len) == 0); + } + return false; +} + +inline bool object::iterator::key_equals_case_insensitive(const std::string_view & o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // See For case-insensitive string comparisons, avoid char-by-char functions + // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/ + // Note that it might be worth rolling our own strncasecmp function, with vectorization. + return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0); + } + return false; +} +// +// key_value_pair inline implementation +// +inline key_value_pair::key_value_pair(const std::string_view &_key, element _value) noexcept : + key(_key), value(_value) {} + +inline std::ostream& operator<<(std::ostream& out, const object &value) { + return out << minify<object>(value); +} +inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value) { + return out << minify<key_value_pair>(value); +} + +} // namespace dom + +template<> +inline std::ostream& minifier<dom::object>::print(std::ostream& out) { + out << '{'; + auto pair = value.begin(); + auto end = value.end(); + if (pair != end) { + out << minify<dom::key_value_pair>(*pair); + for (++pair; pair != end; ++pair) { + out << "," << minify<dom::key_value_pair>(*pair); + } + } + return out << '}'; +} + +template<> +inline std::ostream& minifier<dom::key_value_pair>::print(std::ostream& out) { + return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value; +} + +#if SIMDJSON_EXCEPTIONS + +template<> +inline std::ostream& minifier<simdjson_result<dom::object>>::print(std::ostream& out) { + if (value.error()) { throw simdjson_error(value.error()); } + return out << minify<dom::object>(value.first); +} + +inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false) { + return out << minify<simdjson_result<dom::object>>(value); +} +#endif // SIMDJSON_EXCEPTIONS + +} // namespace simdjson + +#endif // SIMDJSON_INLINE_OBJECT_H +/* end file include/simdjson/inline/object.h */ /* begin file include/simdjson/inline/padded_string.h */ #ifndef SIMDJSON_INLINE_PADDED_STRING_H #define SIMDJSON_INLINE_PADDED_STRING_H #include <climits> #include <cstring> #include <memory> #include <string> -namespace simdjson::internal { +namespace simdjson { +namespace internal { // low-level function to allocate memory with padding so we can read past the // "length" bytes safely. if you must provide a pointer to some data, create it // with this function: length is the max. size in bytes of the string caller is // responsible to free the memory (free(...)) inline char *allocate_padded_buffer(size_t length) noexcept { // we could do a simple malloc // return (char *) malloc(length + SIMDJSON_PADDING); // However, we might as well align to cache lines... size_t totalpaddedlength = length + SIMDJSON_PADDING; +#if defined(_MSC_VER) && _MSC_VER < 1910 + // For legacy Visual Studio 2015 since it does not have proper C++11 support + char *padded_buffer = new[totalpaddedlength]; +#else char *padded_buffer = aligned_malloc_char(64, totalpaddedlength); +#endif #ifndef NDEBUG if (padded_buffer == nullptr) { return nullptr; } #endif // NDEBUG memset(padded_buffer + length, 0, totalpaddedlength - length); return padded_buffer; } // allocate_padded_buffer() -} // namespace simdjson::internal +} // namespace internal -namespace simdjson { -inline padded_string::padded_string() noexcept : viable_size(0), data_ptr(nullptr) {} +inline padded_string::padded_string() noexcept {} inline padded_string::padded_string(size_t length) noexcept : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { if (data_ptr != nullptr) data_ptr[length] = '\0'; // easier when you need a c_str } @@ -4575,11 +6870,15 @@ inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } inline simdjson_result<padded_string> padded_string::load(const std::string &filename) noexcept { // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe std::FILE *fp = std::fopen(filename.c_str(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + if (fp == nullptr) { return IO_ERROR; } // Get the file size @@ -4617,13 +6916,18 @@ /* end file include/simdjson/inline/padded_string.h */ /* begin file include/simdjson/inline/parsedjson_iterator.h */ #ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H #define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H +#include <cstring> namespace simdjson { +// VS2017 reports deprecated warnings when you define a deprecated class's methods. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_DEPRECATED_WARNING + // Because of template weirdness, the actual class definition is inline in the document class WARN_UNUSED bool dom::parser::Iterator::is_ok() const { return location < tape_length; } @@ -4669,19 +6973,19 @@ location += 1; } location += 1; current_val = doc.tape[location]; - current_type = (current_val >> 56); + current_type = uint8_t(current_val >> 56); return true; } void dom::parser::Iterator::move_to_value() { // assume that we are on a key, so move by 1. location += 1; current_val = doc.tape[location]; - current_type = (current_val >> 56); + current_type = uint8_t(current_val >> 56); } bool dom::parser::Iterator::move_to_key(const char *key) { if (down()) { do { @@ -4754,18 +7058,18 @@ // we have that npos < target_location here do { oldnpos = npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump - npos = (current_val & internal::JSON_VALUE_MASK); + npos = uint32_t(current_val); } else { npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); } } while (npos < target_location); location = oldnpos; current_val = doc.tape[location]; - current_type = current_val >> 56; + current_type = uint8_t(current_val >> 56); return true; } bool dom::parser::Iterator::up() { if (depth == 1) { @@ -4774,92 +7078,96 @@ to_start_scope(); // next we just move to the previous value depth--; location -= 1; current_val = doc.tape[location]; - current_type = (current_val >> 56); + current_type = uint8_t(current_val >> 56); return true; } bool dom::parser::Iterator::down() { if (location + 1 >= tape_length) { return false; } if ((current_type == '[') || (current_type == '{')) { - size_t npos = (current_val & internal::JSON_VALUE_MASK); + size_t npos = uint32_t(current_val); if (npos == location + 2) { return false; // we have an empty scope } depth++; assert(depth < max_depth); location = location + 1; depth_index[depth].start_of_scope = location; depth_index[depth].scope_type = current_type; current_val = doc.tape[location]; - current_type = (current_val >> 56); + current_type = uint8_t(current_val >> 56); return true; } return false; } void dom::parser::Iterator::to_start_scope() { location = depth_index[depth].start_of_scope; current_val = doc.tape[location]; - current_type = (current_val >> 56); + current_type = uint8_t(current_val >> 56); } bool dom::parser::Iterator::next() { size_t npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump - npos = (current_val & internal::JSON_VALUE_MASK); + npos = uint32_t(current_val); } else { npos = location + (is_number() ? 2 : 1); } uint64_t next_val = doc.tape[npos]; - uint8_t next_type = (next_val >> 56); + uint8_t next_type = uint8_t(next_val >> 56); if ((next_type == ']') || (next_type == '}')) { return false; // we reached the end of the scope } location = npos; current_val = next_val; current_type = next_type; return true; } - dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) - : doc(pj.doc), depth(0), location(0), tape_length(0) { + : doc(pj.doc) +{ #if SIMDJSON_EXCEPTIONS if (!pj.valid) { throw simdjson_error(pj.error); } #else - if (!pj.valid) { abort(); } + if (!pj.valid) { return; } // abort() usage is forbidden in the library #endif max_depth = pj.max_depth(); depth_index = new scopeindex_t[max_depth + 1]; depth_index[0].start_of_scope = location; current_val = doc.tape[location++]; - current_type = (current_val >> 56); + current_type = uint8_t(current_val >> 56); depth_index[0].scope_type = current_type; - tape_length = current_val & internal::JSON_VALUE_MASK; + tape_length = size_t(current_val & internal::JSON_VALUE_MASK); if (location < tape_length) { // If we make it here, then depth_capacity must >=2, but the compiler // may not know this. current_val = doc.tape[location]; - current_type = (current_val >> 56); + current_type = uint8_t(current_val >> 56); depth++; assert(depth < max_depth); depth_index[depth].start_of_scope = location; depth_index[depth].scope_type = current_type; } } - dom::parser::Iterator::Iterator( const dom::parser::Iterator &o) noexcept - : doc(o.doc), max_depth(o.depth), depth(o.depth), location(o.location), - tape_length(o.tape_length), current_type(o.current_type), - current_val(o.current_val) { + : doc(o.doc), + max_depth(o.depth), + depth(o.depth), + location(o.location), + tape_length(o.tape_length), + current_type(o.current_type), + current_val(o.current_val) +{ depth_index = new scopeindex_t[max_depth+1]; memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); } dom::parser::Iterator::~Iterator() noexcept { @@ -4902,11 +7210,11 @@ break; case '{': // we have an object case '}': // we end an object case '[': // we start an array case ']': // we end an array - os << static_cast<char>(current_type); + os << char(current_type); break; default: return false; } return true; @@ -4929,11 +7237,11 @@ if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { // escaping the character new_pointer[new_length] = '\\'; new_length++; } - new_pointer[new_length] = fragment; + new_pointer[new_length] = char(fragment); i += 3; #if __cpp_exceptions } catch (std::invalid_argument &) { delete[] new_pointer; return false; // the fragment is invalid @@ -5042,11 +7350,11 @@ key_or_index += pointer[offset]; } bool found = false; if (is_object()) { - if (move_to_key(key_or_index.c_str(), key_or_index.length())) { + if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) { found = relative_move_to(pointer + offset, length - offset); } } else if (is_array()) { if (key_or_index == "-") { // handling "-" case first if (down()) { @@ -5054,18 +7362,18 @@ ; // moving to the end of the array // moving to the nonexistent value right after... size_t npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump - npos = (current_val & internal::JSON_VALUE_MASK); + npos = uint32_t(current_val); } else { npos = location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); } location = npos; current_val = doc.tape[npos]; - current_type = (current_val >> 56); + current_type = uint8_t(current_val >> 56); return true; // how could it fail ? } } else { // regular numeric index // The index can't have a leading '0' if (key_or_index[0] == '0' && key_or_index.length() > 1) { @@ -5084,12 +7392,325 @@ } return found; } +SIMDJSON_POP_DISABLE_WARNINGS + } // namespace simdjson #endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H /* end file include/simdjson/inline/parsedjson_iterator.h */ +/* begin file include/simdjson/inline/parser.h */ +#ifndef SIMDJSON_INLINE_PARSER_H +#define SIMDJSON_INLINE_PARSER_H +#include <cstdio> +#include <climits> + +namespace simdjson { +namespace dom { + +// +// parser inline implementation +// +#if defined(_MSC_VER) && _MSC_VER < 1910 +// older versions of Visual Studio lack proper support for unique_ptr. +really_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity}, + loaded_bytes(nullptr) { +} +#else +really_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity}, + loaded_bytes(nullptr, &aligned_free_char) { +} +#endif +really_inline parser::parser(parser &&other) noexcept = default; +really_inline parser &parser::operator=(parser &&other) noexcept = default; + +inline bool parser::is_valid() const noexcept { return valid; } +inline int parser::get_error_code() const noexcept { return error; } +inline std::string parser::get_error_message() const noexcept { return error_message(error); } +inline bool parser::print_json(std::ostream &os) const noexcept { + if (!valid) { return false; } + os << doc.root(); + return true; +} +inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { + return valid ? doc.dump_raw_tape(os) : false; +} + +inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(path.c_str(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + if(std::fseek(fp, 0, SEEK_END) < 0) { + std::fclose(fp); + return IO_ERROR; + } + long len = std::ftell(fp); + if((len < 0) || (len == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } + + // Make sure we have enough capacity to load the file + if (_loaded_bytes_capacity < size_t(len)) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + std::fclose(fp); + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + + // Read the string + std::rewind(fp); + size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { + return IO_ERROR; + } + + return bytes_read; +} + +inline simdjson_result<element> parser::load(const std::string &path) & noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + return parse(loaded_bytes.get(), len, false); +} + +inline simdjson_result<document_stream> parser::load_many(const std::string &path, size_t batch_size) noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + return document_stream(*this, (const uint8_t*)loaded_bytes.get(), len, batch_size); +} + +inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + error_code _error = ensure_capacity(len); + if (_error) { return _error; } + + if (realloc_if_needed) { + const uint8_t *tmp_buf = buf; + buf = (uint8_t *)internal::allocate_padded_buffer(len); + if (buf == nullptr) + return MEMALLOC; + memcpy((void *)buf, tmp_buf, len); + } + + _error = implementation->parse(buf, len, doc); + if (realloc_if_needed) { + aligned_free((void *)buf); // must free before we exit + } + if (_error) { return _error; } + + return doc.root(); +} +really_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse((const uint8_t *)buf, len, realloc_if_needed); +} +really_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept { + return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +really_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept { + return parse(s.data(), s.length(), false); +} + +inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + return document_stream(*this, buf, len, batch_size); +} +inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { + return parse_many((const uint8_t *)buf, len, batch_size); +} +inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} +inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} + +really_inline size_t parser::capacity() const noexcept { + return implementation ? implementation->capacity() : 0; +} +really_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +really_inline size_t parser::max_depth() const noexcept { + return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; +} + +WARN_UNUSED +inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { + // + // Reallocate implementation and document if needed + // + error_code err; + // + // It is possible that we change max_depth without touching capacity, in + // which case, we do not want to reallocate the document buffers. + // + bool need_doc_allocation{false}; + if (implementation) { + need_doc_allocation = implementation->capacity() != capacity || !doc.tape; + err = implementation->allocate(capacity, max_depth); + } else { + need_doc_allocation = true; + err = simdjson::active_implementation->create_dom_parser_implementation(capacity, max_depth, implementation); + } + if (err) { return err; } + if (need_doc_allocation) { + err = doc.allocate(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +WARN_UNUSED +inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { + return !allocate(capacity, max_depth); +} + +inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { + // If we don't have enough capacity, (try to) automatically bump it. + // If the document was taken, reallocate that too. + // Both in one if statement to minimize unlikely branching. + if (unlikely(capacity() < desired_capacity || !doc.tape)) { + if (desired_capacity > max_capacity()) { + return error = CAPACITY; + } + return allocate(desired_capacity, max_depth()); + } + + return SUCCESS; +} + +really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + _max_capacity = max_capacity; +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_INLINE_PARSER_H +/* end file include/simdjson/inline/parser.h */ +/* begin file include/simdjson/inline/tape_ref.h */ +#ifndef SIMDJSON_INLINE_TAPE_REF_H +#define SIMDJSON_INLINE_TAPE_REF_H + +#include <cstring> + +namespace simdjson { +namespace internal { + +// +// tape_ref inline implementation +// +really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} +really_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} + +// Some value types have a specific on-tape word value. It can be faster +// to check the type by doing a word-to-word comparison instead of extracting the +// most significant 8 bits. + +really_inline bool tape_ref::is_double() const noexcept { + constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; + return doc->tape[json_index] == tape_double; +} +really_inline bool tape_ref::is_int64() const noexcept { + constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; + return doc->tape[json_index] == tape_int64; +} +really_inline bool tape_ref::is_uint64() const noexcept { + constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; + return doc->tape[json_index] == tape_uint64; +} +really_inline bool tape_ref::is_false() const noexcept { + constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; + return doc->tape[json_index] == tape_false; +} +really_inline bool tape_ref::is_true() const noexcept { + constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; + return doc->tape[json_index] == tape_true; +} +really_inline bool tape_ref::is_null_on_tape() const noexcept { + constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; + return doc->tape[json_index] == tape_null; +} + +inline size_t tape_ref::after_element() const noexcept { + switch (tape_ref_type()) { + case tape_type::START_ARRAY: + case tape_type::START_OBJECT: + return matching_brace_index(); + case tape_type::UINT64: + case tape_type::INT64: + case tape_type::DOUBLE: + return json_index + 2; + default: + return json_index + 1; + } +} +really_inline tape_type tape_ref::tape_ref_type() const noexcept { + return static_cast<tape_type>(doc->tape[json_index] >> 56); +} +really_inline uint64_t internal::tape_ref::tape_value() const noexcept { + return doc->tape[json_index] & internal::JSON_VALUE_MASK; +} +really_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { + return uint32_t(doc->tape[json_index]); +} +really_inline uint32_t internal::tape_ref::scope_count() const noexcept { + return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); +} + +template<typename T> +really_inline T tape_ref::next_tape_value() const noexcept { + static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); + // Though the following is tempting... + // return *reinterpret_cast<const T*>(&doc->tape[json_index + 1]); + // It is not generally safe. It is safer, and often faster to rely + // on memcpy. Yes, it is uglier, but it is also encapsulated. + T x; + memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); + return x; +} + +really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { + size_t string_buf_index = size_t(tape_value()); + uint32_t len; + memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + return len; +} + +really_inline const char * internal::tape_ref::get_c_str() const noexcept { + size_t string_buf_index = size_t(tape_value()); + return reinterpret_cast<const char *>(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); +} + +inline std::string_view internal::tape_ref::get_string_view() const noexcept { + return std::string_view( + get_c_str(), + get_string_length() + ); +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INLINE_TAPE_REF_H +/* end file include/simdjson/inline/tape_ref.h */ + +SIMDJSON_POP_DISABLE_WARNINGS + #endif // SIMDJSON_H -/* end file include/simdjson/inline/parsedjson_iterator.h */ +/* end file include/simdjson/inline/tape_ref.h */ \ No newline at end of file