/* load.c
 * Copyright (c) 2012, Peter Ohler
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 *  - Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 
 *  - Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 *  - Neither the name of Peter Ohler nor the names of its contributors may be
 *    used to endorse or promote products derived from this software without
 *    specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "ruby.h"
#include "oj.h"

enum {
    TIME_HINT = 0x0100,
};

typedef struct _ParseInfo {
    char	*str;		/* buffer being read from */
    char	*s;		/* current position in buffer */
#ifdef HAVE_RUBY_ENCODING_H
    rb_encoding	*encoding;
#else
    void	*encoding;
#endif
    Options	options;
} *ParseInfo;

static VALUE	classname2class(const char *name, ParseInfo pi);
static VALUE	read_next(ParseInfo pi, int hint);
static VALUE	read_obj(ParseInfo pi);
static VALUE	read_array(ParseInfo pi);
static VALUE	read_str(ParseInfo pi, int hint);
static VALUE	read_num(ParseInfo pi);
static VALUE	read_time(ParseInfo pi);
static VALUE	read_true(ParseInfo pi);
static VALUE	read_false(ParseInfo pi);
static VALUE	read_nil(ParseInfo pi);
static void	next_non_white(ParseInfo pi);
static char*	read_quoted_value(ParseInfo pi);


/* This XML parser is a single pass, destructive, callback parser. It is a
 * single pass parse since it only make one pass over the characters in the
 * XML document string. It is destructive because it re-uses the content of
 * the string for values in the callback and places \0 characters at various
 * places to mark the end of tokens and strings. It is a callback parser like
 * a SAX parser because it uses callback when document elements are
 * encountered.
 *
 * Parsing is very tolerant. Lack of headers and even mispelled element
 * endings are passed over without raising an error. A best attempt is made in
 * all cases to parse the string.
 */

inline static void
next_non_white(ParseInfo pi) {
    for (; 1; pi->s++) {
	switch(*pi->s) {
	case ' ':
	case '\t':
	case '\f':
	case '\n':
	case '\r':
	    break;
	default:
	    return;
	}
    }
}

inline static void
next_white(ParseInfo pi) {
    for (; 1; pi->s++) {
	switch(*pi->s) {
	case ' ':
	case '\t':
	case '\f':
	case '\n':
	case '\r':
	case '\0':
	    return;
	default:
	    break;
	}
    }
}

inline static VALUE
resolve_classname(VALUE mod, const char *class_name, int auto_define) {
    VALUE       clas;
    ID          ci = rb_intern(class_name);

    if (rb_const_defined_at(mod, ci) || !auto_define) {
	clas = rb_const_get_at(mod, ci);
    } else {
	clas = rb_define_class_under(mod, class_name, oj_bag_class);
    }
    return clas;
}

inline static VALUE
classname2obj(const char *name, ParseInfo pi) {
    VALUE   clas = classname2class(name, pi);
    
    if (Qundef == clas) {
        return Qnil;
    } else {
        return rb_obj_alloc(clas);
    }
}

static VALUE
classname2class(const char *name, ParseInfo pi) {
    VALUE       clas;
    VALUE       *slot;
    int		auto_define = (Yes == pi->options->auto_define);

    if (Qundef == (clas = oj_cache_get(oj_class_cache, name, &slot))) {
        char            class_name[1024];
        char            *s;
        const char      *n = name;

        clas = rb_cObject;
        for (s = class_name; '\0' != *n; n++) {
            if (':' == *n) {
                *s = '\0';
                n++;
		if (':' != *n) {
                    raise_error("Invalid classname, expected another ':'", pi->str, pi->s);
		}
                if (Qundef == (clas = resolve_classname(clas, class_name, auto_define))) {
                    return Qundef;
                }
                s = class_name;
            } else {
                *s++ = *n;
            }
        }
        *s = '\0';
        if (Qundef != (clas = resolve_classname(clas, class_name, auto_define))) {
            *slot = clas;
        }
    }
    return clas;
}

VALUE
oj_parse(char *json, Options options) {
    VALUE		obj;
    struct _ParseInfo	pi;

    if (0 == json) {
	raise_error("Invalid arg, xml string can not be null", json, 0);
    }
    /* initialize parse info */
    pi.str = json;
    pi.s = json;
#ifdef HAVE_RUBY_ENCODING_H
    pi.encoding = ('\0' == *options->encoding) ? 0 : rb_enc_find(options->encoding);
#else
    pi.encoding = 0;
#endif
    pi.options = options;
    if (Qundef == (obj = read_next(&pi, 0))) {
	raise_error("no object read", pi.str, pi.s);
    }
    next_non_white(&pi);	// skip white space
    if ('\0' != *pi.s) {
	raise_error("invalid format, extra characters", pi.str, pi.s);
    }
    return obj;
}

static VALUE
read_next(ParseInfo pi, int hint) {
    VALUE	obj;

    next_non_white(pi);	// skip white space
    switch (*pi->s) {
    case '{':
	obj = read_obj(pi);
	break;
    case '[':
	obj = read_array(pi);
	break;
    case '"':
	obj = read_str(pi, hint);
	break;
    case '+':
    case '-':
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
	if (TIME_HINT == hint) {
	    obj = read_time(pi);
	} else {
	    obj = read_num(pi);
	}
    break;
    case 't':
	obj = read_true(pi);
	break;
    case 'f':
	obj = read_false(pi);
	break;
    case 'n':
	obj = read_nil(pi);
	break;
    case '\0':
	obj = Qundef;
	break;
    default:
	obj = Qundef;
	break;
    }
    return obj;
}

static VALUE
read_obj(ParseInfo pi) {
    VALUE	obj = Qundef;
    VALUE	key = Qundef;
    VALUE	val = Qundef;
    const char	*ks;
    int		obj_type = T_NONE;
    
    pi->s++;
    next_non_white(pi);
    if ('}' == *pi->s) {
	pi->s++;
	return rb_hash_new();
    }
    while (1) {
	next_non_white(pi);
	ks = 0;
	key = Qundef;
	val = Qundef;
	if ('"' != *pi->s || Qundef == (key = read_str(pi, 0))) {
	    raise_error("unexpected character", pi->str, pi->s);
	}
	next_non_white(pi);
	if (':' == *pi->s) {
	    pi->s++;
	} else {
	    raise_error("invalid format, expected :", pi->str, pi->s);
	}
	if (T_STRING == rb_type(key)) {
	    ks = StringValuePtr(key);
	} else {
	    ks = 0;
	}
	if (0 != ks && Qundef == obj && ObjectMode == pi->options->mode) {
	    if ('^' == *ks && '\0' == ks[2]) { // special directions
		switch (ks[1]) {
		case 't': // Time
		    obj = read_next(pi, TIME_HINT); // raises if can not convert to Time
		    key = Qundef;
		    break;
		case 'c': // Class
		    obj = read_next(pi, T_CLASS);
		    key = Qundef;
		    break;
		case 's': // String
		    obj = read_next(pi, T_STRING);
		    key = Qundef;
		    break;
		case 'm': // Symbol
		    obj = read_next(pi, T_SYMBOL);
		    key = Qundef;
		    break;
		case 'o': // Object
		    obj = read_next(pi, T_OBJECT);
		    obj_type = T_OBJECT;
		    key = Qundef;
		    break;
		case 'i': // Id for circular reference
		    // TBD
		default:
		    // handle later
		    break;
		}
	    }
	}
	if (Qundef != key) {
	    if (Qundef == val && Qundef == (val = read_next(pi, 0))) {
		raise_error("unexpected character", pi->str, pi->s);
	    }
	    if (ObjectMode == pi->options->mode &&
		0 != ks && '^' == *ks && '#' == ks[1] &&
		(T_NONE == obj_type || T_HASH == obj_type) &&
		rb_type(val) == T_ARRAY && 2 == RARRAY_LEN(val)) {  // Hash entry
		VALUE	*np = RARRAY_PTR(val);

		key = *np;
		val = *(np + 1);
	    }
	    if (Qundef == obj) {
		obj = rb_hash_new();
		obj_type = T_HASH;
	    }
	    if (T_OBJECT == obj_type) {
		VALUE       *slot;
		ID          var_id;

		if (Qundef == (var_id = oj_cache_get(oj_attr_cache, ks, &slot))) {
		    char	attr[1024];

		    if ('~' == *ks) {
			strncpy(attr, ks + 1, sizeof(attr) - 1);
		    } else {
			*attr = '@';
			strncpy(attr + 1, ks, sizeof(attr) - 2);
		    }
		    attr[sizeof(attr) - 1] = '\0';
		    var_id = rb_intern(attr);
		    *slot = var_id;
		}
		rb_ivar_set(obj, var_id, val);
	    } else if (T_HASH == obj_type) {
		rb_hash_aset(obj, key, val);
	    } else {
		raise_error("invalid Object format, too many Hash entries.", pi->str, pi->s);
	    }
	}
	next_non_white(pi);
	if ('}' == *pi->s) {
	    pi->s++;
	    break;
	} else if (',' == *pi->s) {
	    pi->s++;
	} else {
	    //printf("*** '%s'\n", pi->s);
	    raise_error("invalid format, expected , or } while in an object", pi->str, pi->s);
	}
    }
    return obj;
}

static VALUE
read_array(ParseInfo pi) {
    VALUE	a = rb_ary_new();
    VALUE	e;

    pi->s++;
    next_non_white(pi);
    if (']' == *pi->s) {
	pi->s++;
	return a;
    }
    while (1) {
	if (Qundef == (e = read_next(pi, 0))) {
	    raise_error("unexpected character", pi->str, pi->s);
	}
        rb_ary_push(a, e);
	next_non_white(pi);	// skip white space
	if (',' == *pi->s) {
	    pi->s++;
	} else if (']' == *pi->s) {
	    pi->s++;
	    break;
	} else {
	    raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
	}
    }
    return a;
}

static VALUE
read_str(ParseInfo pi, int hint) {
    char	*text = read_quoted_value(pi);
    VALUE	obj;

    if (ObjectMode != pi->options->mode) {
	hint = T_STRING;
    }
    switch (hint) {
    case T_CLASS:
	obj = classname2class(text, pi);
	break;
    case T_OBJECT:
	obj = classname2obj(text, pi);
	break;
    case T_STRING:
	obj = rb_str_new2(text);
#ifdef HAVE_RUBY_ENCODING_H
	if (0 != pi->encoding) {
	    rb_enc_associate(obj, pi->encoding);
	}
#endif
	break;
    case T_SYMBOL:
#ifdef HAVE_RUBY_ENCODING_H
	if (0 != pi->encoding) {
	    obj = rb_str_new2(text);
	    rb_enc_associate(obj, pi->encoding);
	    obj = rb_funcall(obj, oj_to_sym_id, 0);
	} else {
	    obj = ID2SYM(rb_intern(text));
	}
#else
	obj = ID2SYM(rb_intern(text));
#endif
	break;
    case 0:
    default:
	if (':' == *text) {
	    if (':' == text[1]) { // escaped :, it s string
		obj = rb_str_new2(text + 1);
#ifdef HAVE_RUBY_ENCODING_H
		if (0 != pi->encoding) {
		    rb_enc_associate(obj, pi->encoding);
		}
#endif
	    } else { // Symbol
#ifdef HAVE_RUBY_ENCODING_H
		if (0 != pi->encoding) {
		    obj = rb_str_new2(text + 1);
		    rb_enc_associate(obj, pi->encoding);
		    obj = rb_funcall(obj, oj_to_sym_id, 0);
		} else {
		    obj = ID2SYM(rb_intern(text + 1));
		}
#else
		obj = ID2SYM(rb_intern(text + 1));
#endif
	    }
	} else {
	    obj = rb_str_new2(text);
#ifdef HAVE_RUBY_ENCODING_H
	    if (0 != pi->encoding) {
		rb_enc_associate(obj, pi->encoding);
	    }
#endif
	}
	break;
    }
    return obj;
}

#ifdef RUBINIUS
#define NUM_MAX 0x07FFFFFF
#else
#define NUM_MAX (FIXNUM_MAX >> 8)
#endif

static VALUE
read_num(ParseInfo pi) {
    char	*start = pi->s;
    int64_t	n = 0;
    long	a = 0;
    long	div = 1;
    long	e = 0;
    int		neg = 0;
    int		eneg = 0;
    int		big = 0;

    if ('-' == *pi->s) {
	pi->s++;
	neg = 1;
    } else if ('+' == *pi->s) {
	pi->s++;
    }
    for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
	n = n * 10 + (*pi->s - '0');
	if (NUM_MAX <= n) {
	    big = 1;
	}
    }
    if (big) {
	char	c = *pi->s;
	VALUE	num;
	
	*pi->s = '\0';
	num = rb_cstr_to_inum(start, 10, 0);
	*pi->s = c;

	return num;
    }
    if ('.' == *pi->s) {
	pi->s++;
	for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
	    a = a * 10 + (*pi->s - '0');
	    div *= 10;
	}
    }
    if ('e' == *pi->s || 'E' == *pi->s) {
	pi->s++;
	if ('-' == *pi->s) {
	    pi->s++;
	    eneg = 1;
	} else if ('+' == *pi->s) {
	    pi->s++;
	}
	for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
	    e = e * 10 + (*pi->s - '0');
	}
    }
    if (0 == e && 0 == a && 1 == div) {
	if (neg) {
	    n = -n;
	}
	return LONG2NUM(n);
    } else {
	double	d = (double)n + (double)a / (double)div;

	if (neg) {
	    d = -d;
	}
	if (0 != e) {
	    if (eneg) {
		e = -e;
	    }
	    d *= pow(10.0, e);
	}
	return DBL2NUM(d);
    }
}

static VALUE
read_time(ParseInfo pi) {
    VALUE       args[2];
    long        v = 0;
    long        v2 = 0;

    for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
	v = v * 10 + (*pi->s - '0');
    }
    if ('.' == *pi->s) {
	pi->s++;
	for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
	    v2 = v2 * 10 + (*pi->s - '0');
	}
    }
    args[0] = LONG2NUM(v);
    args[1] = LONG2NUM(v2);

    return rb_funcall2(oj_time_class, oj_at_id, 2, args);
}

static VALUE
read_true(ParseInfo pi) {
    pi->s++;
    if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
	raise_error("invalid format, expected 'true'", pi->str, pi->s);
    }
    pi->s += 3;

    return Qtrue;
}

static VALUE
read_false(ParseInfo pi) {
    pi->s++;
    if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
	raise_error("invalid format, expected 'false'", pi->str, pi->s);
    }
    pi->s += 4;

    return Qfalse;
}

static VALUE
read_nil(ParseInfo pi) {
    pi->s++;
    if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
	raise_error("invalid format, expected 'nil'", pi->str, pi->s);
    }
    pi->s += 3;

    return Qnil;
}

static char
read_hex(ParseInfo pi, char *h) {
    uint8_t	b = 0;

    if ('0' <= *h && *h <= '9') {
	b = *h - '0';
    } else if ('A' <= *h && *h <= 'F') {
	b = *h - 'A' + 10;
    } else if ('a' <= *h && *h <= 'f') {
	b = *h - 'a' + 10;
    } else {
	pi->s = h;
	raise_error("invalid hex character", pi->str, pi->s);
    }
    h++;
    b = b << 4;
    if ('0' <= *h && *h <= '9') {
	b += *h - '0';
    } else if ('A' <= *h && *h <= 'F') {
	b += *h - 'A' + 10;
    } else if ('a' <= *h && *h <= 'f') {
	b += *h - 'a' + 10;
    } else {
	pi->s = h;
	raise_error("invalid hex character", pi->str, pi->s);
    }
    return (char)b;
}

/* Assume the value starts immediately and goes until the quote character is
 * reached again. Do not read the character after the terminating quote.
 */
static char*
read_quoted_value(ParseInfo pi) {
    char	*value = 0;
    char	*h = pi->s; // head
    char	*t = h;     // tail
    
    h++;	// skip quote character
    t++;
    value = h;
    // TBD can whole string be read in and then eval-ed by ruby of there is a special character
    for (; '"' != *h; h++, t++) {
	if ('\0' == *h) {
	    pi->s = h;
	    raise_error("quoted string not terminated", pi->str, pi->s);
	} else if ('\\' == *h) {
	    h++;
	    switch (*h) {
	    case 'n':	*t = '\n';	break;
	    case 'r':	*t = '\r';	break;
	    case 't':	*t = '\t';	break;
	    case 'f':	*t = '\f';	break;
	    case 'b':	*t = '\b';	break;
	    case '"':	*t = '"';	break;
	    case '/':	*t = '/';	break;
	    case '\\':	*t = '\\';	break;
	    case 'u':
		// TBD if first character is 00 then skip it
		h++;
		*t = read_hex(pi, h);
		h += 2;
		if ('\0' != *t) {
		    t++;
		}
		*t = read_hex(pi, h);
		h++;
		break;
	    default:
		pi->s = h;
		raise_error("invalid escaped character", pi->str, pi->s);
		break;
	    }
	} else if (t != h) {
	    *t = *h;
	}
    }
    *t = '\0'; // terminate value
    pi->s = h + 1;

    return value;
}