/*
* pg_record_coder.c - PG::Coder class extension
*
*/
#include "pg.h"
VALUE rb_cPG_RecordCoder;
VALUE rb_cPG_RecordEncoder;
VALUE rb_cPG_RecordDecoder;
typedef struct {
t_pg_coder comp;
VALUE typemap;
} t_pg_recordcoder;
static void
pg_recordcoder_mark( t_pg_recordcoder *this )
{
rb_gc_mark(this->typemap);
}
static VALUE
pg_recordcoder_encoder_allocate( VALUE klass )
{
t_pg_recordcoder *this;
VALUE self = Data_Make_Struct( klass, t_pg_recordcoder, pg_recordcoder_mark, -1, this );
pg_coder_init_encoder( self );
this->typemap = pg_typemap_all_strings;
return self;
}
static VALUE
pg_recordcoder_decoder_allocate( VALUE klass )
{
t_pg_recordcoder *this;
VALUE self = Data_Make_Struct( klass, t_pg_recordcoder, pg_recordcoder_mark, -1, this );
pg_coder_init_decoder( self );
this->typemap = pg_typemap_all_strings;
return self;
}
/*
* call-seq:
* coder.type_map = map
*
* Defines how single columns are encoded or decoded.
* +map+ must be a kind of PG::TypeMap .
*
* Defaults to a PG::TypeMapAllStrings , so that PG::TextEncoder::String respectively
* PG::TextDecoder::String is used for encoding/decoding of each column.
*
*/
static VALUE
pg_recordcoder_type_map_set(VALUE self, VALUE type_map)
{
t_pg_recordcoder *this = DATA_PTR( self );
if ( !rb_obj_is_kind_of(type_map, rb_cTypeMap) ){
rb_raise( rb_eTypeError, "wrong elements type %s (expected some kind of PG::TypeMap)",
rb_obj_classname( type_map ) );
}
this->typemap = type_map;
return type_map;
}
/*
* call-seq:
* coder.type_map -> PG::TypeMap
*
* The PG::TypeMap that will be used for encoding and decoding of columns.
*/
static VALUE
pg_recordcoder_type_map_get(VALUE self)
{
t_pg_recordcoder *this = DATA_PTR( self );
return this->typemap;
}
/*
* Document-class: PG::TextEncoder::Record < PG::RecordEncoder
*
* This class encodes one record of columns for transmission as query parameter in text format.
* See PostgreSQL {Composite Types}[https://www.postgresql.org/docs/current/rowtypes.html] for a description of the format and how it can be used.
*
* PostgreSQL allows composite types to be used in many of the same ways that simple types can be used.
* For example, a column of a table can be declared to be of a composite type.
*
* The encoder expects the record columns as array of values.
* The single values are encoded as defined in the assigned #type_map.
* If no type_map was assigned, all values are converted to strings by PG::TextEncoder::String.
*
* It is possible to manually assign a type encoder for each column per PG::TypeMapByColumn,
* or to make use of PG::BasicTypeMapBasedOnResult to assign them based on the table OIDs.
*
* Encode a record from an Array
to a +String+ in PostgreSQL Composite Type format (uses default type map TypeMapAllStrings):
* PG::TextEncoder::Record.new.encode([1, 2]) # => "(\"1\",\"2\")"
*
* Encode a record from Array
to +String+ :
* # Build a type map for two Floats
* tm = PG::TypeMapByColumn.new([PG::TextEncoder::Float.new]*2)
* # Use this type map to encode the record:
* PG::TextEncoder::Record.new(type_map: tm).encode([1,2])
* # => "(\"1.0000000000000000E+00\",\"2.0000000000000000E+00\")"
*
* Records can also be encoded and decoded directly to and from the database.
* This avoids intermediate string allocations and is very fast.
* Take the following type and table definitions:
* conn.exec("CREATE TYPE complex AS (r float, i float) ")
* conn.exec("CREATE TABLE my_table (v1 complex, v2 complex) ")
*
* A record can be encoded by adding a type map to Connection#exec_params and siblings:
* # Build a type map for the two floats "r" and "i" as in our "complex" type
* tm = PG::TypeMapByColumn.new([PG::TextEncoder::Float.new]*2)
* # Build a record encoder to encode this type as a record:
* enco = PG::TextEncoder::Record.new(type_map: tm)
* # Insert table data and use the encoder to cast the complex value "v1" from ruby array:
* conn.exec_params("INSERT INTO my_table VALUES ($1) RETURNING v1", [[1,2]], 0, PG::TypeMapByColumn.new([enco])).to_a
* # => [{"v1"=>"(1,2)"}]
*
* Alternatively the typemap can be build based on database OIDs rather than manually assigning encoders.
* # Fetch a NULL record of our type to retrieve the OIDs of the two fields "r" and "i"
* oids = conn.exec( "SELECT (NULL::complex).*" )
* # Build a type map (PG::TypeMapByColumn) for encoding the "complex" type
* etm = PG::BasicTypeMapBasedOnResult.new(conn).build_column_map( oids )
*
* It's also possible to use the BasicTypeMapForQueries to send records to the database server.
* In contrast to ORM libraries, PG doesn't have information regarding the type of data the server is expecting.
* So BasicTypeMapForQueries works based on the class of the values to be sent and it has to be instructed that a ruby array shall be casted to a record.
* # Retrieve OIDs of all basic types from the database
* etm = PG::BasicTypeMapForQueries.new(conn)
* etm.encode_array_as = :record
* # Apply the basic type registry to all values sent to the server
* conn.type_map_for_queries = etm
* # Send a complex number as an array of two integers
* conn.exec_params("INSERT INTO my_table VALUES ($1) RETURNING v1", [[1,2]]).to_a
* # => [{"v1"=>"(1,2)"}]
*
* Records can also be nested or further wrapped into other encoders like PG::TextEncoder::CopyRow.
*
* See also PG::TextDecoder::Record for the decoding direction.
*/
static int
pg_text_enc_record(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
t_pg_recordcoder *this = (t_pg_recordcoder *)conv;
t_pg_coder_enc_func enc_func;
static t_pg_coder *p_elem_coder;
int i;
t_typemap *p_typemap;
char *current_out;
char *end_capa_ptr;
p_typemap = DATA_PTR( this->typemap );
p_typemap->funcs.fit_to_query( this->typemap, value );
/* Allocate a new string with embedded capacity and realloc exponential when needed. */
PG_RB_STR_NEW( *intermediate, current_out, end_capa_ptr );
PG_ENCODING_SET_NOCHECK(*intermediate, enc_idx);
PG_RB_STR_ENSURE_CAPA( *intermediate, 1, current_out, end_capa_ptr );
*current_out++ = '(';
for( i=0; i 0 ){
PG_RB_STR_ENSURE_CAPA( *intermediate, 1, current_out, end_capa_ptr );
*current_out++ = ',';
}
switch(TYPE(entry)){
case T_NIL:
/* emit nothing... */
break;
default:
p_elem_coder = p_typemap->funcs.typecast_query_param(p_typemap, entry, i);
enc_func = pg_coder_enc_func(p_elem_coder);
/* 1st pass for retiving the required memory space */
strlen = enc_func(p_elem_coder, entry, NULL, &subint, enc_idx);
if( strlen == -1 ){
/* we can directly use String value in subint */
strlen = RSTRING_LEN(subint);
/* size of string assuming the worst case, that every character must be escaped. */
PG_RB_STR_ENSURE_CAPA( *intermediate, strlen * 2 + 2, current_out, end_capa_ptr );
*current_out++ = '"';
/* Record string from subint with backslash escaping */
for(ptr1 = RSTRING_PTR(subint); ptr1 < RSTRING_PTR(subint) + strlen; ptr1++) {
if (*ptr1 == '"' || *ptr1 == '\\') {
*current_out++ = *ptr1;
}
*current_out++ = *ptr1;
}
*current_out++ = '"';
} else {
/* 2nd pass for writing the data to prepared buffer */
/* size of string assuming the worst case, that every character must be escaped. */
PG_RB_STR_ENSURE_CAPA( *intermediate, strlen * 2 + 2, current_out, end_capa_ptr );
*current_out++ = '"';
/* Place the unescaped string at current output position. */
strlen = enc_func(p_elem_coder, entry, current_out, &subint, enc_idx);
ptr1 = current_out;
ptr2 = current_out + strlen;
/* count required backlashs */
for(backslashs = 0; ptr1 != ptr2; ptr1++) {
/* Escape backslash itself, newline, carriage return, and the current delimiter character. */
if(*ptr1 == '"' || *ptr1 == '\\'){
backslashs++;
}
}
ptr1 = current_out + strlen;
ptr2 = current_out + strlen + backslashs;
current_out = ptr2;
/* Then store the escaped string on the final position, walking
* right to left, until all backslashs are placed. */
while( ptr1 != ptr2 ) {
*--ptr2 = *--ptr1;
if(*ptr1 == '"' || *ptr1 == '\\'){
*--ptr2 = *ptr1;
}
}
*current_out++ = '"';
}
}
}
PG_RB_STR_ENSURE_CAPA( *intermediate, 1, current_out, end_capa_ptr );
*current_out++ = ')';
rb_str_set_len( *intermediate, current_out - RSTRING_PTR(*intermediate) );
return -1;
}
/*
* record_isspace() --- a non-locale-dependent isspace()
*
* We used to use isspace() for parsing array values, but that has
* undesirable results: an array value might be silently interpreted
* differently depending on the locale setting. Now we just hard-wire
* the traditional ASCII definition of isspace().
*/
static int
record_isspace(char ch)
{
if (ch == ' ' ||
ch == '\t' ||
ch == '\n' ||
ch == '\r' ||
ch == '\v' ||
ch == '\f')
return 1;
return 0;
}
/*
* Document-class: PG::TextDecoder::Record < PG::RecordDecoder
*
* This class decodes one record of values received from a composite type column in text format.
* See PostgreSQL {Composite Types}[https://www.postgresql.org/docs/current/rowtypes.html] for a description of the format and how it can be used.
*
* PostgreSQL allows composite types to be used in many of the same ways that simple types can be used.
* For example, a column of a table can be declared to be of a composite type.
*
* The columns are returned from the decoder as array of values.
* The single values are decoded as defined in the assigned #type_map.
* If no type_map was assigned, all values are converted to strings by PG::TextDecoder::String.
*
* Decode a record in Composite Type format from +String+ to Array
(uses default type map TypeMapAllStrings):
* PG::TextDecoder::Record.new.decode("(1,2)") # => ["1", "2"]
*
* Decode a record from +String+ to Array
:
* # Build a type map for two Floats
* tm = PG::TypeMapByColumn.new([PG::TextDecoder::Float.new]*2)
* # Use this type map to decode the record:
* PG::TextDecoder::Record.new(type_map: tm).decode("(1,2)")
* # => [1.0, 2.0]
*
* Records can also be encoded and decoded directly to and from the database.
* This avoids intermediate String allocations and is very fast.
* Take the following type and table definitions:
* conn.exec("CREATE TYPE complex AS (r float, i float) ")
* conn.exec("CREATE TABLE my_table (v1 complex, v2 complex) ")
* conn.exec("INSERT INTO my_table VALUES((2,3), (4,5)), ((6,7), (8,9)) ")
*
* The record can be decoded by applying a type map to the PG::Result object:
* # Build a type map for two floats "r" and "i"
* tm = PG::TypeMapByColumn.new([PG::TextDecoder::Float.new]*2)
* # Build a record decoder to decode this two-value type:
* deco = PG::TextDecoder::Record.new(type_map: tm)
* # Fetch table data and use the decoder to cast the two complex values "v1" and "v2":
* conn.exec("SELECT * FROM my_table").map_types!(PG::TypeMapByColumn.new([deco]*2)).to_a
* # => [{"v1"=>[2.0, 3.0], "v2"=>[4.0, 5.0]}, {"v1"=>[6.0, 7.0], "v2"=>[8.0, 9.0]}]
*
* It's more very convenient to use the PG::BasicTypeRegistry, which is based on database OIDs.
* # Fetch a NULL record of our type to retrieve the OIDs of the two fields "r" and "i"
* oids = conn.exec( "SELECT (NULL::complex).*" )
* # Build a type map (PG::TypeMapByColumn) for decoding the "complex" type
* dtm = PG::BasicTypeMapForResults.new(conn).build_column_map( oids )
* # Register a record decoder for decoding our type "complex"
* PG::BasicTypeRegistry.register_coder(PG::TextDecoder::Record.new(type_map: dtm, name: "complex"))
* # Apply the basic type registry to all results retrieved from the server
* conn.type_map_for_results = PG::BasicTypeMapForResults.new(conn)
* # Now queries decode the "complex" type (and many basic types) automatically
* conn.exec("SELECT * FROM my_table").to_a
* # => [{"v1"=>[2.0, 3.0], "v2"=>[4.0, 5.0]}, {"v1"=>[6.0, 7.0], "v2"=>[8.0, 9.0]}]
*
* Records can also be nested or further wrapped into other decoders like PG::TextDecoder::CopyRow.
*
* See also PG::TextEncoder::Record for the encoding direction (data sent to the server).
*/
/*
* Parse the current line into separate attributes (fields),
* performing de-escaping as needed.
*
* All fields are gathered into a ruby Array. The de-escaped field data is written
* into to a ruby String. This object is reused for non string columns.
* For String columns the field value is directly used as return value and no
* reuse of the memory is done.
*
* The parser is thankfully borrowed from the PostgreSQL sources:
* src/backend/utils/adt/rowtypes.c
*/
static VALUE
pg_text_dec_record(t_pg_coder *conv, char *input_line, int len, int _tuple, int _field, int enc_idx)
{
t_pg_recordcoder *this = (t_pg_recordcoder *)conv;
/* Return value: array */
VALUE array;
/* Current field */
VALUE field_str;
int fieldno;
int expected_fields;
char *output_ptr;
char *cur_ptr;
char *end_capa_ptr;
t_typemap *p_typemap;
p_typemap = DATA_PTR( this->typemap );
expected_fields = p_typemap->funcs.fit_to_copy_get( this->typemap );
/* The received input string will probably have this->nfields fields. */
array = rb_ary_new2(expected_fields);
/* Allocate a new string with embedded capacity and realloc later with
* exponential growing size when needed. */
PG_RB_STR_NEW( field_str, output_ptr, end_capa_ptr );
/* set pointer variables for loop */
cur_ptr = input_line;
/*
* Scan the string. We use "buf" to accumulate the de-quoted data for
* each column, which is then fed to the appropriate input converter.
*/
/* Allow leading whitespace */
while (*cur_ptr && record_isspace(*cur_ptr))
cur_ptr++;
if (*cur_ptr++ != '(')
rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Missing left parenthesis.", input_line );
for (fieldno = 0; ; fieldno++)
{
/* Check for null: completely empty input means null */
if (*cur_ptr == ',' || *cur_ptr == ')')
{
rb_ary_push(array, Qnil);
}
else
{
/* Extract string for this column */
int inquote = 0;
VALUE field_value;
while (inquote || !(*cur_ptr == ',' || *cur_ptr == ')'))
{
char ch = *cur_ptr++;
if (ch == '\0')
rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Unexpected end of input.", input_line );
if (ch == '\\')
{
if (*cur_ptr == '\0')
rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Unexpected end of input.", input_line );
PG_RB_STR_ENSURE_CAPA( field_str, 1, output_ptr, end_capa_ptr );
*output_ptr++ = *cur_ptr++;
}
else if (ch == '"')
{
if (!inquote)
inquote = 1;
else if (*cur_ptr == '"')
{
/* doubled quote within quote sequence */
PG_RB_STR_ENSURE_CAPA( field_str, 1, output_ptr, end_capa_ptr );
*output_ptr++ = *cur_ptr++;
}
else
inquote = 0;
} else {
PG_RB_STR_ENSURE_CAPA( field_str, 1, output_ptr, end_capa_ptr );
/* Add ch to output string */
*output_ptr++ = ch;
}
}
/* Convert the column value */
rb_str_set_len( field_str, output_ptr - RSTRING_PTR(field_str) );
field_value = p_typemap->funcs.typecast_copy_get( p_typemap, field_str, fieldno, 0, enc_idx );
rb_ary_push(array, field_value);
if( field_value == field_str ){
/* Our output string will be send to the user, so we can not reuse
* it for the next field. */
PG_RB_STR_NEW( field_str, output_ptr, end_capa_ptr );
}
/* Reset the pointer to the start of the output/buffer string. */
output_ptr = RSTRING_PTR(field_str);
}
/* Skip comma that separates prior field from this one */
if (*cur_ptr == ',') {
cur_ptr++;
} else if (*cur_ptr == ')') {
cur_ptr++;
/* Done if we hit closing parenthesis */
break;
} else {
rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Too few columns.", input_line );
}
}
/* Allow trailing whitespace */
while (*cur_ptr && record_isspace(*cur_ptr))
cur_ptr++;
if (*cur_ptr)
rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Junk after right parenthesis.", input_line );
return array;
}
void
init_pg_recordcoder()
{
/* Document-class: PG::RecordCoder < PG::Coder
*
* This is the base class for all type cast classes for COPY data,
*/
rb_cPG_RecordCoder = rb_define_class_under( rb_mPG, "RecordCoder", rb_cPG_Coder );
rb_define_method( rb_cPG_RecordCoder, "type_map=", pg_recordcoder_type_map_set, 1 );
rb_define_method( rb_cPG_RecordCoder, "type_map", pg_recordcoder_type_map_get, 0 );
/* Document-class: PG::RecordEncoder < PG::RecordCoder */
rb_cPG_RecordEncoder = rb_define_class_under( rb_mPG, "RecordEncoder", rb_cPG_RecordCoder );
rb_define_alloc_func( rb_cPG_RecordEncoder, pg_recordcoder_encoder_allocate );
/* Document-class: PG::RecordDecoder < PG::RecordCoder */
rb_cPG_RecordDecoder = rb_define_class_under( rb_mPG, "RecordDecoder", rb_cPG_RecordCoder );
rb_define_alloc_func( rb_cPG_RecordDecoder, pg_recordcoder_decoder_allocate );
/* Make RDoc aware of the encoder classes... */
/* rb_mPG_TextEncoder = rb_define_module_under( rb_mPG, "TextEncoder" ); */
/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "Record", rb_cPG_RecordEncoder ); */
pg_define_coder( "Record", pg_text_enc_record, rb_cPG_RecordEncoder, rb_mPG_TextEncoder );
/* rb_mPG_TextDecoder = rb_define_module_under( rb_mPG, "TextDecoder" ); */
/* dummy = rb_define_class_under( rb_mPG_TextDecoder, "Record", rb_cPG_RecordDecoder ); */
pg_define_coder( "Record", pg_text_dec_record, rb_cPG_RecordDecoder, rb_mPG_TextDecoder );
}