# frozen_string_literal: true
require 'masking/errors'
require 'masking/insert_statement/sql_builder'
module Masking
class InsertStatement
attr_reader :raw_statement, :table
def initialize(raw_statement)
@raw_statement = raw_statement
PARSE_REGEXP.match(raw_statement).tap do |match_data|
raise Error::InsertStatementParseError if match_data.nil?
@table = match_data[:table]
@columns_section = match_data[:columns_section]
@values_section = match_data[:values_section]
end
end
def columns
# NOTE: define and extract to ColumnSet class?
@columns ||= columns_section.scan(COLUMNS_REGEXP).flatten.map(&:to_sym)
end
def values
# NOTE: define and extract to ValueSet class?
@values ||= values_section.split(VALUE_ROW_SPLITTER)
.tap { |rows| rows.each_with_index { |_, i| recursive_pattern_value_concat(rows, i) } }
.flat_map { |row| row.scan(values_regexp) }
end
def sql
SQLBuilder.build(table: table, columns: columns, values: values)
end
private
attr_reader :columns_section, :values_section
VALUE_ROW_SPLITTER = '),('
PARSE_REGEXP = /INSERT INTO `(?
.+)` \((?.+)\) VALUES (?.+);/.freeze
COLUMNS_REGEXP = /`(.*?)`/.freeze
# NOTE: in mysqldump,
# integer/float/NULL type has dumped without single quote. e.g. -123 / 2.4 / NULL
# string/time type has dumped with single quote. e.g. 'string' / '2018-08-22 13:27:34'
# binary/blob type has dumped with _binary prefix. e.g. _binary 'binarydata'
# if there is single quote inside of value, it will dumped with escape. e.g. 'chikahiro\'s item'
# in number, there could be include Scientific notation e.g. 1.2E3 / -1.2E-3 / 1e+030 / 9.71726e-17
# refs: https://dev.mysql.com/doc/refman/5.7/en/precision-math-numbers.html
NUMBER_REGEXP = '[+eE0-9.-]+'
NULL_REGEXP = 'NULL'
STRING_TIME_REGEXP = "'.*?'"
BINARY_REGEXP = "_binary '.*?'"
VALUE_REGEXP = "(#{NUMBER_REGEXP}|#{NULL_REGEXP}|#{STRING_TIME_REGEXP}|#{BINARY_REGEXP})"
def values_regexp
@values_regexp ||= /^\(?#{([VALUE_REGEXP] * columns.count).join(?,)}\)?$/
end
# Check single quote count on each value, and just continue if it's even number.
# if it's odd, concat with next row (it means a value contains "),(" pattern)
# e.g. INSERT ... VALUES (123,'string ),( abc'),(456,'ab');
# refs: implementation of parsing CSV on ruby standard library FasterCSV (ja): https://www.clear-code.com/blog/2018/12/25.html
def recursive_pattern_value_concat(value_rows, index)
return if value_rows[index].gsub(/\\\\/, '').gsub(/\\'/, '').count(?').even?
value_rows[index] += VALUE_ROW_SPLITTER + value_rows.delete_at(index + 1)
recursive_pattern_value_concat(value_rows, index)
end
end
end