/* saj.c
 * Copyright (c) 2012, Peter Ohler
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 *  - Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 
 *  - Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 *  - Neither the name of Peter Ohler nor the names of its contributors may be
 *    used to endorse or promote products derived from this software without
 *    specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#if !IS_WINDOWS
#include <sys/resource.h>  /* for getrlimit() on linux */
#endif
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <sys/types.h>
#include <unistd.h>

// Workaround in case INFINITY is not defined in math.h or if the OS is CentOS
#define OJ_INFINITY (1.0/0.0)

#include "oj.h"
#include "encode.h"

typedef struct _ParseInfo {
    char	*str;		/* buffer being read from */
    char	*s;		/* current position in buffer */
    void	*stack_min;
    VALUE	handler;
    int		has_hash_start;
    int		has_hash_end;
    int		has_array_start;
    int		has_array_end;
    int		has_add_value;
    int		has_error;
} *ParseInfo;

static void	read_next(ParseInfo pi, const char *key);
static void	read_hash(ParseInfo pi, const char *key);
static void	read_array(ParseInfo pi, const char *key);
static void	read_str(ParseInfo pi, const char *key);
static void	read_num(ParseInfo pi, const char *key);
static void	read_true(ParseInfo pi, const char *key);
static void	read_false(ParseInfo pi, const char *key);
static void	read_nil(ParseInfo pi, const char *key);
static void	next_non_white(ParseInfo pi);
static char*	read_quoted_value(ParseInfo pi);
static void	skip_comment(ParseInfo pi);

/* This JSON parser is a single pass, destructive, callback parser. It is a
 * single pass parse since it only make one pass over the characters in the
 * JSON document string. It is destructive because it re-uses the content of
 * the string for values in the callback and places \0 characters at various
 * places to mark the end of tokens and strings. It is a callback parser like
 * a SAX parser because it uses callback when document elements are
 * encountered.
 *
 * Parsing is very tolerant. Lack of headers and even mispelled element
 * endings are passed over without raising an error. A best attempt is made in
 * all cases to parse the string.
 */

inline static void
call_error(const char *msg, ParseInfo pi, const char* file, int line) {
    char	buf[128];
    const char	*s = pi->s;
    int		jline = 1;
    int		col = 1;

    for (; pi->str < s && '\n' != *s; s--) {
	col++;
    }
    for (; pi->str < s; s--) {
	if ('\n' == *s) {
	    jline++;
	}
    }
    sprintf(buf, "%s at line %d, column %d [%s:%d]", msg, jline, col, file, line);
    rb_funcall(pi->handler, oj_error_id, 3, rb_str_new2(buf), LONG2NUM(jline), LONG2NUM(col));
}

inline static void
next_non_white(ParseInfo pi) {
    for (; 1; pi->s++) {
	switch(*pi->s) {
	case ' ':
	case '\t':
	case '\f':
	case '\n':
	case '\r':
	    break;
	case '/':
	    skip_comment(pi);
	    break;
	default:
	    return;
	}
    }
}

inline static void
call_add_value(VALUE handler, VALUE value, const char *key) {
    volatile VALUE	k;

    if (0 == key) {
	k = Qnil;
    } else {
	k = rb_str_new2(key);
	k = oj_encode(k);
    }
    rb_funcall(handler, oj_add_value_id, 2, value, k);
}

inline static void
call_no_value(VALUE handler, ID method, const char *key) {
    volatile VALUE	k;

    if (0 == key) {
	k = Qnil;
    } else {
	k = rb_str_new2(key);
	k = oj_encode(k);
    }
    rb_funcall(handler, method, 1, k);
}

static void
skip_comment(ParseInfo pi) {
    pi->s++; /* skip first / */
    if ('*' == *pi->s) {
	pi->s++;
	for (; '\0' != *pi->s; pi->s++) {
	    if ('*' == *pi->s && '/' == *(pi->s + 1)) {
		pi->s++;
		return;
	    } else if ('\0' == *pi->s) {
		if (pi->has_error) {
		    call_error("comment not terminated", pi, __FILE__, __LINE__);
		} else {
		    raise_error("comment not terminated", pi->str, pi->s);
		}
	    }
	}
    } else if ('/' == *pi->s) {
	for (; 1; pi->s++) {
	    switch (*pi->s) {
	    case '\n':
	    case '\r':
	    case '\f':
	    case '\0':
		return;
	    default:
		break;
	    }
	}
    } else {
	if (pi->has_error) {
	    call_error("invalid comment", pi, __FILE__, __LINE__);
	} else {
	    raise_error("invalid comment", pi->str, pi->s);
	}
    }
}

static void
read_next(ParseInfo pi, const char *key) {
    VALUE	obj;

    if ((void*)&obj < pi->stack_min) {
	rb_raise(rb_eSysStackError, "JSON is too deeply nested");
    }
    next_non_white(pi);	/* skip white space */
    switch (*pi->s) {
    case '{':
	read_hash(pi, key);
	break;
    case '[':
	read_array(pi, key);
	break;
    case '"':
	read_str(pi, key);
	break;
    case '+':
    case '-':
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
	read_num(pi, key);
	break;
    case 'I':
	read_num(pi, key);
	break;
    case 't':
	read_true(pi, key);
	break;
    case 'f':
	read_false(pi, key);
	break;
    case 'n':
	read_nil(pi, key);
	break;
    case '\0':
	return;
    default:
	return;
    }
}

static void
read_hash(ParseInfo pi, const char *key) {
    const char	*ks;
    
    if (pi->has_hash_start) {
	call_no_value(pi->handler, oj_hash_start_id, key);
    }
    pi->s++;
    next_non_white(pi);
    if ('}' == *pi->s) {
	pi->s++;
    } else {
	while (1) {
	    next_non_white(pi);
	    ks = read_quoted_value(pi);
	    next_non_white(pi);
	    if (':' == *pi->s) {
		pi->s++;
	    } else {
		if (pi->has_error) {
		    call_error("invalid format, expected :", pi, __FILE__, __LINE__);
		}
		raise_error("invalid format, expected :", pi->str, pi->s);
	    }
	    read_next(pi, ks);
	    next_non_white(pi);
	    if ('}' == *pi->s) {
		pi->s++;
		break;
	    } else if (',' == *pi->s) {
		pi->s++;
	    } else {
		if (pi->has_error) {
		    call_error("invalid format, expected , or } while in an object", pi, __FILE__, __LINE__);
		}
		raise_error("invalid format, expected , or } while in an object", pi->str, pi->s);
	    }
	}
    }
    if (pi->has_hash_end) {
	call_no_value(pi->handler, oj_hash_end_id, key);
    }
}

static void
read_array(ParseInfo pi, const char *key) {
    if (pi->has_array_start) {
	call_no_value(pi->handler, oj_array_start_id, key);
    }
    pi->s++;
    next_non_white(pi);
    if (']' == *pi->s) {
	pi->s++;
    } else {
	while (1) {
	    read_next(pi, 0);
	    next_non_white(pi);
	    if (',' == *pi->s) {
		pi->s++;
	    } else if (']' == *pi->s) {
		pi->s++;
		break;
	    } else {
		if (pi->has_error) {
		    call_error("invalid format, expected , or ] while in an array", pi, __FILE__, __LINE__);
		}
		raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
	    }
	}
    }
    if (pi->has_array_end) {
	call_no_value(pi->handler, oj_array_end_id, key);
    }
}

static void
read_str(ParseInfo pi, const char *key) {
    char	*text;

    text = read_quoted_value(pi);
    if (pi->has_add_value) {
	VALUE	s = rb_str_new2(text);

	s = oj_encode(s);
	call_add_value(pi->handler, s, key);
    }
}

#ifdef RUBINIUS_RUBY
#define NUM_MAX 0x07FFFFFF
#else
#define NUM_MAX (FIXNUM_MAX >> 8)
#endif

static void
read_num(ParseInfo pi, const char *key) {
    char	*start = pi->s;
    int64_t	n = 0;
    long	a = 0;
    long	div = 1;
    long	e = 0;
    int		neg = 0;
    int		eneg = 0;
    int		big = 0;

    if ('-' == *pi->s) {
	pi->s++;
	neg = 1;
    } else if ('+' == *pi->s) {
	pi->s++;
    }
    if ('I' == *pi->s) {
	if (0 != strncmp("Infinity", pi->s, 8)) {
	    if (pi->has_error) {
		call_error("number or other value", pi, __FILE__, __LINE__);
	    }
	    raise_error("number or other value", pi->str, pi->s);
	}
	pi->s += 8;
	if (neg) {
	    if (pi->has_add_value) {
		call_add_value(pi->handler, rb_float_new(-OJ_INFINITY), key);
	    }
	} else {
	    if (pi->has_add_value) {
		call_add_value(pi->handler, rb_float_new(OJ_INFINITY), key);
	    }
	}
	return;
    }
    for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
	if (big) {
	    big++;
	} else {
	    n = n * 10 + (*pi->s - '0');
	    if (NUM_MAX <= n) {
		big = 1;
	    }
	}
    }
    if ('.' == *pi->s) {
	pi->s++;
	for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
	    a = a * 10 + (*pi->s - '0');
	    div *= 10;
	    if (NUM_MAX <= div) {
		big = 1;
	    }
	}
    }
    if ('e' == *pi->s || 'E' == *pi->s) {
	pi->s++;
	if ('-' == *pi->s) {
	    pi->s++;
	    eneg = 1;
	} else if ('+' == *pi->s) {
	    pi->s++;
	}
	for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
	    e = e * 10 + (*pi->s - '0');
	    if (NUM_MAX <= e) {
		big = 1;
	    }
	}
    }
    if (0 == e && 0 == a && 1 == div) {
	if (big) {
	    char	c = *pi->s;
	
	    *pi->s = '\0';
	    if (pi->has_add_value) {
		call_add_value(pi->handler, rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new2(start)), key);
	    }
	    *pi->s = c;
	} else {
	    if (neg) {
		n = -n;
	    }
	    if (pi->has_add_value) {
		call_add_value(pi->handler, LONG2NUM(n), key);
	    }
	}
	return;
    } else { /* decimal */
	if (big) {
	    char	c = *pi->s;
	
	    *pi->s = '\0';
	    if (pi->has_add_value) {
		call_add_value(pi->handler, rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new2(start)), key);
	    }
	    *pi->s = c;
	} else {
	    double	d = (double)n + (double)a / (double)div;

	    if (neg) {
		d = -d;
	    }
	    if (1 < big) {
		e += big - 1;
	    }
	    if (0 != e) {
		if (eneg) {
		    e = -e;
		}
		d *= pow(10.0, e);
	    }
	    if (pi->has_add_value) {
		call_add_value(pi->handler, rb_float_new(d), key);
	    }
	}
    }
}

static void
read_true(ParseInfo pi, const char *key) {
    pi->s++;
    if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
	if (pi->has_error) {
	    call_error("invalid format, expected 'true'", pi, __FILE__, __LINE__);
	}
	raise_error("invalid format, expected 'true'", pi->str, pi->s);
    }
    pi->s += 3;
    if (pi->has_add_value) {
	call_add_value(pi->handler, Qtrue, key);
    }
}

static void
read_false(ParseInfo pi, const char *key) {
    pi->s++;
    if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
	if (pi->has_error) {
	    call_error("invalid format, expected 'false'", pi, __FILE__, __LINE__);
	}
	raise_error("invalid format, expected 'false'", pi->str, pi->s);
    }
    pi->s += 4;
    if (pi->has_add_value) {
	call_add_value(pi->handler, Qfalse, key);
    }
}

static void
read_nil(ParseInfo pi, const char *key) {
    pi->s++;
    if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
	if (pi->has_error) {
	    call_error("invalid format, expected 'null'", pi, __FILE__, __LINE__);
	}
	raise_error("invalid format, expected 'null'", pi->str, pi->s);
    }
    pi->s += 3;
    if (pi->has_add_value) {
	call_add_value(pi->handler, Qnil, key);
    }
}

static uint32_t
read_hex(ParseInfo pi, char *h) {
    uint32_t	b = 0;
    int		i;

    /* TBD this can be made faster with a table */
    for (i = 0; i < 4; i++, h++) {
	b = b << 4;
	if ('0' <= *h && *h <= '9') {
	    b += *h - '0';
	} else if ('A' <= *h && *h <= 'F') {
	    b += *h - 'A' + 10;
	} else if ('a' <= *h && *h <= 'f') {
	    b += *h - 'a' + 10;
	} else {
	    pi->s = h;
	    if (pi->has_error) {
		call_error("invalid hex character", pi, __FILE__, __LINE__);
	    }
	    raise_error("invalid hex character", pi->str, pi->s);
	}
    }
    return b;
}

static char*
unicode_to_chars(ParseInfo pi, char *t, uint32_t code) {
    if (0x0000007F >= code) {
	*t = (char)code;
    } else if (0x000007FF >= code) {
	*t++ = 0xC0 | (code >> 6);
	*t = 0x80 | (0x3F & code);
    } else if (0x0000FFFF >= code) {
	*t++ = 0xE0 | (code >> 12);
	*t++ = 0x80 | ((code >> 6) & 0x3F);
	*t = 0x80 | (0x3F & code);
    } else if (0x001FFFFF >= code) {
	*t++ = 0xF0 | (code >> 18);
	*t++ = 0x80 | ((code >> 12) & 0x3F);
	*t++ = 0x80 | ((code >> 6) & 0x3F);
	*t = 0x80 | (0x3F & code);
    } else if (0x03FFFFFF >= code) {
	*t++ = 0xF8 | (code >> 24);
	*t++ = 0x80 | ((code >> 18) & 0x3F);
	*t++ = 0x80 | ((code >> 12) & 0x3F);
	*t++ = 0x80 | ((code >> 6) & 0x3F);
	*t = 0x80 | (0x3F & code);
    } else if (0x7FFFFFFF >= code) {
	*t++ = 0xFC | (code >> 30);
	*t++ = 0x80 | ((code >> 24) & 0x3F);
	*t++ = 0x80 | ((code >> 18) & 0x3F);
	*t++ = 0x80 | ((code >> 12) & 0x3F);
	*t++ = 0x80 | ((code >> 6) & 0x3F);
	*t = 0x80 | (0x3F & code);
    } else {
	if (pi->has_error) {
	    call_error("invalid Unicode", pi, __FILE__, __LINE__);
	}
	raise_error("invalid Unicode", pi->str, pi->s);
    }
    return t;
}

/* Assume the value starts immediately and goes until the quote character is
 * reached again. Do not read the character after the terminating quote.
 */
static char*
read_quoted_value(ParseInfo pi) {
    char	*value = 0;
    char	*h = pi->s; /* head */
    char	*t = h;	    /* tail */
    uint32_t	code;
    
    h++;	/* skip quote character */
    t++;
    value = h;
    for (; '"' != *h; h++, t++) {
	if ('\0' == *h) {
	    pi->s = h;
	    raise_error("quoted string not terminated", pi->str, pi->s);
	} else if ('\\' == *h) {
	    h++;
	    switch (*h) {
	    case 'n':	*t = '\n';	break;
	    case 'r':	*t = '\r';	break;
	    case 't':	*t = '\t';	break;
	    case 'f':	*t = '\f';	break;
	    case 'b':	*t = '\b';	break;
	    case '"':	*t = '"';	break;
	    case '/':	*t = '/';	break;
	    case '\\':	*t = '\\';	break;
	    case 'u':
		h++;
		code = read_hex(pi, h);
		h += 3;
		if (0x0000D800 <= code && code <= 0x0000DFFF) {
		    uint32_t	c1 = (code - 0x0000D800) & 0x000003FF;
		    uint32_t	c2;

		    h++;
		    if ('\\' != *h || 'u' != *(h + 1)) {
			pi->s = h;
			if (pi->has_error) {
			    call_error("invalid escaped character", pi, __FILE__, __LINE__);
			}
			raise_error("invalid escaped character", pi->str, pi->s);
		    }
		    h += 2;
		    c2 = read_hex(pi, h);
		    h += 3;
		    c2 = (c2 - 0x0000DC00) & 0x000003FF;
		    code = ((c1 << 10) | c2) + 0x00010000;
		}
		t = unicode_to_chars(pi, t, code);
		break;
	    default:
		pi->s = h;
		if (pi->has_error) {
		    call_error("invalid escaped character", pi, __FILE__, __LINE__);
		}
		raise_error("invalid escaped character", pi->str, pi->s);
		break;
	    }
	} else if (t != h) {
	    *t = *h;
	}
    }
    *t = '\0'; /* terminate value */
    pi->s = h + 1;

    return value;
}

static void
saj_parse(VALUE handler, char *json) {
    volatile VALUE	obj = Qnil;
    struct _ParseInfo	pi;

    if (0 == json) {
	if (pi.has_error) {
	    call_error("Invalid arg, xml string can not be null", &pi, __FILE__, __LINE__);
	}
	raise_error("Invalid arg, xml string can not be null", json, 0);
    }
    /* skip UTF-8 BOM if present */
    if (0xEF == (uint8_t)*json && 0xBB == (uint8_t)json[1] && 0xBF == (uint8_t)json[2]) {
	json += 3;
    }
    /* initialize parse info */
    pi.str = json;
    pi.s = json;
#if IS_WINDOWS
    pi.stack_min = (void*)((char*)&obj - (512 * 1024)); /* assume a 1M stack and give half to ruby */
#else
    {
	struct rlimit	lim;

	if (0 == getrlimit(RLIMIT_STACK, &lim)) {
	    pi.stack_min = (void*)((char*)&obj - (lim.rlim_cur / 4 * 3)); /* let 3/4ths of the stack be used only */
	} else {
	    pi.stack_min = 0; /* indicates not to check stack limit */
	}
    }
#endif
    pi.handler = handler;
    pi.has_hash_start = rb_respond_to(handler, oj_hash_start_id);
    pi.has_hash_end = rb_respond_to(handler, oj_hash_end_id);
    pi.has_array_start = rb_respond_to(handler, oj_array_start_id);
    pi.has_array_end = rb_respond_to(handler, oj_array_end_id);
    pi.has_add_value = rb_respond_to(handler, oj_add_value_id);
    pi.has_error = rb_respond_to(handler, oj_error_id);
    read_next(&pi, 0);
    next_non_white(&pi);
    if ('\0' != *pi.s) {
	if (pi.has_error) {
	    call_error("invalid format, extra characters", &pi, __FILE__, __LINE__);
	} else {
	    raise_error("invalid format, extra characters", pi.str, pi.s);
	}
    }
}

/* call-seq: saj_parse(handler, io)
 *
 * Parses an IO stream or file containing an JSON document. Raises an exception
 * if the JSON is malformed.
 * @param [Oj::Saj] handler Saj (responds to Oj::Saj methods) like handler
 * @param [IO|String] io IO Object to read from
 */
VALUE
oj_saj_parse(int argc, VALUE *argv, VALUE self) {
    char	*json = 0;
    size_t	len = 0;
    VALUE	input = argv[1];

    if (argc < 2) {
	rb_raise(rb_eArgError, "Wrong number of arguments to saj_parse.\n");
    }
    if (rb_type(input) == T_STRING) {
	// the json string gets modified so make a copy of it
	len = RSTRING_LEN(input) + 1;
	json = ALLOC_N(char, len);
	strcpy(json, StringValuePtr(input));
    } else {
	VALUE		clas = rb_obj_class(input);
	volatile VALUE	s;
	
	if (oj_stringio_class == clas) {
	    s = rb_funcall2(input, oj_string_id, 0, 0);
	    len = RSTRING_LEN(s) + 1;
	    json = ALLOC_N(char, len);
	    strcpy(json, rb_string_value_cstr((VALUE*)&s));
#if !IS_WINDOWS
	} else if (rb_cFile == clas && 0 == FIX2INT(rb_funcall(input, oj_pos_id, 0))) {
	    int		fd = FIX2INT(rb_funcall(input, oj_fileno_id, 0));
	    ssize_t	cnt;

	    len = lseek(fd, 0, SEEK_END);
	    lseek(fd, 0, SEEK_SET);
	    json = ALLOC_N(char, len + 1);
	    if (0 >= (cnt = read(fd, json, len)) || cnt != (ssize_t)len) {
		rb_raise(rb_eIOError, "failed to read from IO Object.");
	    }
	    json[len] = '\0';
#endif
	} else if (rb_respond_to(input, oj_read_id)) {
	    s = rb_funcall2(input, oj_read_id, 0, 0);
	    len = RSTRING_LEN(s) + 1;
	    json = ALLOC_N(char, len);
	    strcpy(json, rb_string_value_cstr((VALUE*)&s));
	} else {
	    rb_raise(rb_eArgError, "saj_parse() expected a String or IO Object.");
	}
    }
    saj_parse(*argv, json);
    xfree(json);

    return Qnil;
}