#include #include "sql_parser.h" #define DEBUG 0 enum status_t { start, value, first_zero, leading_zero, number, null_n, null_u, null_l1, null_l2, hex_blob, hex_blob_number, first_char, following_char, utf8_2_firstbyte, utf8_3_firstbyte, utf8_3_secondbyte, utf8_3_err_secondbyte, utf8_4_firstbyte, utf8_4_secondbyte, utf8_4_err_secondbyte, utf8_4_thirdbyte, utf8_4_err_thirdbyte, escape, error }; long parse_insert_query(const char* sql, long len, ParserCallbackHandler& ch) { const char* current = sql; long pos = 0; status_t status = start; const char* mark = 0; const char* temp_mark = 0; int value_len = 0; long ret = -1; // success for (int itor = 0; itor <= len; itor++) { // the loop iterates through the terminate character current = itor < len ? sql + itor : "\0"; pos++; value_len++; switch (status) { case start: if (*current == '(') { status = value; } else { status = start; } break; case value: if (DEBUG) std::cout << "value[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == '\'') { status = first_char; } else if (*current == 'N') { status = null_n; } else if (*current == '0') { mark = current; value_len = 1;//mark status = first_zero; } else if ( (*current >= '1' && *current <= '9') || *current == '-' || *current == '+' || *current == '.') { mark = current; value_len = 1;//mark status = number; } else if (*current == ',') { status = value; } else if (*current == ')') { ch.row_end_callback();//row end callback status = start; } else { status = error; } break; case first_zero: if (DEBUG) std::cout << "first_zero[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == ',') { ch.value_callback(mark, value_len - 1, true);//value_callback :end_value mark = 0; value_len = 0; //mark_reset status = value; } else if (*current == ')') { ch.value_callback(mark, value_len - 1, true);//value_callback :end_value mark = 0; value_len = 0; //mark_reset ch.row_end_callback();//row end callback status = start; } else if (*current == '.') { status = number; } else if (*current == 'x' || *current == 'X') { status = hex_blob; } else if (*current == '0') { mark = current; value_len = 1; //mark status = leading_zero; } else if (*current >= '1' && *current <= '9') { mark = current; value_len = 1; //mark status = number; } else { status = error; } break; case leading_zero: if (DEBUG) std::cout << "leading_zero[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == ',') { ch.value_callback(mark, value_len - 1, true);//value_callback :end_value mark = 0; value_len = 0; //mark_reset status = value; } else if (*current == ')') { ch.value_callback(mark, value_len - 1, true);//value_callback :end_value mark = 0; value_len = 0; //mark_reset ch.row_end_callback();//row end callback status = start; } else if (*current == '.') { status = number; } else if (*current == '0') { mark = current; value_len = 1; //mark status = leading_zero; } else if (*current >= '1' && *current <= '9') { mark = current; value_len = 1; //mark status = number; } else { status = error; } break; case number: if (DEBUG) std::cout << "number[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current >= '0' && *current <= '9') || *current == '.' || *current == 'e' || *current == 'E' || *current == '-') { status = number; } else if (*current == ',') { ch.value_callback(mark, value_len - 1, true);//value_callback :end_value mark = 0; value_len = 0; //mark_reset status = value; } else if (*current == ')') { ch.value_callback(mark, value_len - 1, true);//value_callback :end_value mark = 0; value_len = 0; //mark_reset ch.row_end_callback();//row end callback status = start; } else { status = error; } break; case null_n: if (DEBUG) std::cout << "null_n[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == 'U') { status = null_u; } else { status = error; } break; case null_u: if (DEBUG) std::cout << "null_u[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == 'L') { status = null_l1; } else { status = error; } break; case null_l1: if (DEBUG) std::cout << "null_l1[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == 'L') { status = null_l2; } else { status = error; } break; case null_l2: if (DEBUG) std::cout << "null_l2[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == ',') { //# 0 is NULL pointer ch.value_callback(0, 0, true);//value_callback 0 :end_value mark = 0; value_len = 0; status = value; } else if (*current == ')') { //# 0 is NULL pointer ch.value_callback(0, 0, true);//value_callback 0, :end_value mark = 0; value_len = 0; ch.row_end_callback();//row end callback status = start; } else { status = error; } break; case hex_blob: if (DEBUG) std::cout << "hex_blob[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current >= '0' && *current <= '9') || (*current >= 'A' && *current <= 'F') || (*current >= 'a' && *current <= 'f')) { status = hex_blob_number; } else { status = error; } break; case hex_blob_number: if (DEBUG) std::cout << "hex_blob_number[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current >= '0' && *current <= '9') || (*current >= 'A' && *current <= 'F') || (*current >= 'a' && *current <= 'f')) { status = hex_blob_number; } else if (*current == ',') { ch.value_callback(mark, value_len - 1, true);//value_callback, :end_value mark = 0; value_len = 0; //mark_reset status = value; } else if (*current == ')') { ch.value_callback(mark, value_len - 1, true);//value_callback, :end_value mark = 0; value_len = 0; //mark_reset ch.row_end_callback();//row_end_callback status = start; } else { status = error; } break; case first_char: if (DEBUG) std::cout << "first_char[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == '\'') { mark = current; value_len = 1; //mark ch.value_callback(mark, value_len - 1, true);//value_callback, :end_value mark = 0; value_len = 0; //mark_reset status = value; } else if (*current == '\\') { mark = current; value_len = 1; //mark status = escape; } else if ((*current & 0xe0) == 0xc0) { mark = current; value_len = 1; //mark temp_mark = current; //temp_mark status = utf8_2_firstbyte; } else if ((*current & 0xf0) == 0xe0) { mark = current; value_len = 1; //mark temp_mark = current; //temp_mark status = utf8_3_firstbyte; } else if ((*current & 0xf8) == 0xf0) { mark = current; value_len = 1; //mark temp_mark = current; //temp_mark status = utf8_4_firstbyte; } else { mark = current; value_len = 1; //mark status = following_char; } break; case utf8_2_firstbyte: if (DEBUG) std::cout << "utf8_2_firstbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current & 0xc0) == 0x80) { temp_mark = 0; //temp_mark reset) status = following_char; } else { ch.value_callback(mark, temp_mark - mark, false);//value_callback mark...temp_mark mark = 0; value_len = 0; //mark_reset ch.value_callback("\xef\xbf\xbd\xef\xbf\xbd", 6, false); //value_callback "\xef\xbf\xbd\xef\xbf\xbd" temp_mark = 0; //temp_mark reset status = first_char; } break; case utf8_3_firstbyte: if (DEBUG) std::cout << "utf8_3_firstbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current & 0xc0) == 0x80) { status = utf8_3_secondbyte; } else { status = utf8_3_err_secondbyte; } break; case utf8_3_secondbyte: if (DEBUG) std::cout << "utf8_3_secondbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current & 0xc0) == 0x80) { temp_mark = 0; //temp_mark reset) status = following_char; } else { ch.value_callback(mark, temp_mark - mark, false);//value_callback mark...temp_mark mark = 0; value_len = 0; //mark_reset ch.value_callback("\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd", 9, false); //value_callback "\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd" temp_mark = 0; //temp_mark reset status = first_char; } break; case utf8_3_err_secondbyte: if (DEBUG) std::cout << "utf8_3_err_secondbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; ch.value_callback(mark, temp_mark - mark, false);//value_callback mark...temp_mark mark = 0; value_len = 0; //mark_reset ch.value_callback("\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd", 9, false); //value_callback "\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd" temp_mark = 0; //temp_mark reset status = first_char; break; case utf8_4_firstbyte: if (DEBUG) std::cout << "utf8_4_firstbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current & 0xc0) == 0x80) { status = utf8_4_secondbyte; } else { status = utf8_4_err_secondbyte; } break; case utf8_4_secondbyte: if (DEBUG) std::cout << "utf8_4_secondbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current & 0xc0) == 0x80) { status = utf8_4_thirdbyte; } else { status = utf8_4_err_thirdbyte; } break; case utf8_4_err_secondbyte: if (DEBUG) std::cout << "utf8_4_err_secondbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; status = utf8_4_err_thirdbyte; break; case utf8_4_thirdbyte: if (DEBUG) std::cout << "utf8_4_thirdbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if ((*current & 0xc0) == 0x80) { temp_mark = 0; //temp_mark reset) status = following_char; } else { ch.value_callback(mark, temp_mark - mark, false);//value_callback mark...temp_mark mark = 0; value_len = 0; //mark_reset ch.value_callback("\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd", 12, false); //value_callback "\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd" temp_mark = 0; //temp_mark reset status = first_char; } break; case utf8_4_err_thirdbyte: if (DEBUG) std::cout << "utf8_4_err_thirdbyte[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; ch.value_callback(mark, temp_mark - mark, false);//value_callback mark...temp_mark mark = 0; value_len = 0; //mark_reset ch.value_callback("\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd", 12, false); //value_callback "\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd\xef\xbf\xbd" temp_mark = 0; //temp_mark reset status = first_char; break; case following_char: if (DEBUG) std::cout << "following_char[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == '\'') { ch.value_callback(mark, value_len - 1, true);//value_callback, :end_value mark = 0; value_len = 0; //mark_reset status = value; } else if (*current == '\\') { ch.value_callback(mark, value_len - 1, false);//value_callback mark = current; value_len = 1; status = escape; } else if ((*current & 0xe0) == 0xc0) { temp_mark = current; //temp_mark status = utf8_2_firstbyte; } else if ((*current & 0xf0) == 0xe0) { temp_mark = current; //temp_mark status = utf8_3_firstbyte; } else if ((*current & 0xf8) == 0xf0) { temp_mark = current; //temp_mark status = utf8_4_firstbyte; } else { status = following_char; } break; case escape: if (DEBUG) std::cout << "escape[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; if (*current == '\"') { ch.value_callback("\"", 1, false);//value_callback '\"' mark = 0; value_len = 0; status = first_char; } else if (*current == '\'') { ch.value_callback("\'", 1, false);//value_callback '\'' mark = 0; value_len = 0; status = first_char; } else if (*current == 'n') { ch.value_callback("\n", 1, false);//value_callback '\n' mark = 0; value_len = 0; status = first_char; } else if (*current == '\\') { ch.value_callback("\\", 1, false);//value_callback '\\' mark = 0; value_len = 0; status = first_char; } else if (*current == 'r') { ch.value_callback("\r", 1, false);//value_callback '\r' mark = 0; value_len = 0; status = first_char; } else if (*current == 'Z') { ch.value_callback("\x1a", 1, false);//value_callback '\Z' mark = 0; value_len = 0; status = first_char; // Disabled handling of \0 for now in order to keep it compatible with the old // implementation. // } else if (*current == '0') { // ch.value_callback("\0", 1, false);//value_callback '\0' // mark = 0; value_len = 0; // status = first_char; } else { status = following_char; } break; case error: if (DEBUG) std::cout << "error[" << *current << std::endl << std::flush; ret = pos - 1; break; default: if (DEBUG) std::cout << "default[" << *current << "(0x" << std::hex << (*current & 0xff) << ")]: " << std::endl << std::flush; break; }//switch if (ret != -1) break; }//for return ret; }