ext/trie/trie.c in tyler-trie-0.3.1 vs ext/trie/trie.c in tyler-trie-0.3.3

- old
+ new

@@ -4,73 +4,103 @@ #include <stdio.h> #include <string.h> VALUE cTrie, cTrieNode; +/* + * Document-class: Trie + * + * A key-value data structure for string keys which is efficient memory usage and fast retrieval time. + * + */ + static VALUE rb_trie_alloc(VALUE klass) { VALUE obj; obj = Data_Wrap_Struct(klass, 0, trie_free, trie_new()); return obj; } +/* + * call-seq: + * has_key?(key) -> true/false + * + * Determines whether or not a key exists in the Trie. Use this if you don't care about the value, as it + * is marginally faster than Trie#get. + * + */ static VALUE rb_trie_has_key(VALUE self, VALUE key) { Trie *trie; Data_Get_Struct(self, Trie, trie); - if(trie_retrieve(trie, (TrieChar*)RSTRING(key)->ptr, NULL)) + if(trie_has_key(trie, (TrieChar*)RSTRING(key)->ptr)) return Qtrue; else return Qnil; } +/* + * call-seq: + * get(key) -> value + * [key] -> value + * + * Retrieves the value for a particular key (or nil) from the Trie. + * + */ static VALUE rb_trie_get(VALUE self, VALUE key) { Trie *trie; Data_Get_Struct(self, Trie, trie); TrieData data; if(trie_retrieve(trie, (TrieChar*)RSTRING(key)->ptr, &data)) - return INT2FIX(data); + return (VALUE)data; else return Qnil; } +/* + * call-seq: + * add(key) + * add(key,value) + * + * Add a key, or a key and value to the Trie. If you add a key without a value it assumes true for the value. + * + */ static VALUE rb_trie_add(VALUE self, VALUE args) { Trie *trie; Data_Get_Struct(self, Trie, trie); int size = RARRAY(args)->len; if(size < 1 || size > 2) return Qnil; VALUE key; key = RARRAY(args)->ptr[0]; - TrieData value = size == 2 ? NUM2INT(RARRAY(args)->ptr[1]) : TRIE_DATA_ERROR; + TrieData value = size == 2 ? RARRAY(args)->ptr[1] : TRIE_DATA_ERROR; if(trie_store(trie, (TrieChar*)RSTRING(key)->ptr, value)) return Qtrue; else return Qnil; } +/* + * call-seq: + * delete(key) + * + * Delete a key from the Trie. Returns true if it deleted a key, nil otherwise. + * + */ static VALUE rb_trie_delete(VALUE self, VALUE key) { Trie *trie; Data_Get_Struct(self, Trie, trie); if(trie_delete(trie, (TrieChar*)RSTRING(key)->ptr)) return Qtrue; else return Qnil; } -char* append_char(char* existing, int size, char c) { - char *new = (char*) malloc(size + 2); - memcpy(new, existing, size); - new[size] = c; - new[size + 1] = 0; - return new; -} - static VALUE walk_all_paths(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) { int c; for(c = 1; c < 256; c++) { if(trie_state_is_walkable(state,c)) { TrieState *next_state = trie_state_clone(state); @@ -91,10 +121,17 @@ trie_state_free(next_state); } } } +/* + * call-seq: + * children(prefix) -> [ key, ... ] + * + * Finds all keys in the Trie beginning with the given prefix. + * + */ static VALUE rb_trie_children(VALUE self, VALUE prefix) { if(NIL_P(prefix)) return rb_ary_new(); Trie *trie; @@ -146,11 +183,11 @@ VALUE tuple = rb_ary_new(); rb_ary_push(tuple, rb_str_new2(word)); TrieData trie_data = trie_state_get_data(end_state); - rb_ary_push(tuple, INT2FIX(trie_data)); + rb_ary_push(tuple, (VALUE)trie_data); rb_ary_push(children, tuple); trie_state_free(end_state); } @@ -160,13 +197,17 @@ trie_state_free(next_state); } } } - - - +/* + * call-seq: + * children_with_values(key) -> [ [key,value], ... ] + * + * Finds all keys with their respective values in the Trie beginning with the given prefix. + * + */ static VALUE rb_trie_children_with_values(VALUE self, VALUE prefix) { if(NIL_P(prefix)) return rb_ary_new(); Trie *trie; @@ -192,11 +233,11 @@ trie_state_walk(end_state, '\0'); VALUE tuple = rb_ary_new(); rb_ary_push(tuple, prefix); TrieData trie_data = trie_state_get_data(end_state); - rb_ary_push(tuple, INT2FIX(trie_data)); + rb_ary_push(tuple, (VALUE)trie_data); rb_ary_push(children, tuple); trie_state_free(end_state); } @@ -208,16 +249,19 @@ trie_state_free(state); return children; } -static VALUE rb_trie_node_alloc(VALUE klass) { - VALUE obj; - obj = Data_Wrap_Struct(klass, 0, trie_state_free, NULL); - return obj; -} +static VALUE rb_trie_node_alloc(VALUE klass); +/* + * call-seq: + * root -> TrieNode + * + * Returns a TrieNode representing the root of the Trie. + * + */ static VALUE rb_trie_root(VALUE self) { Trie *trie; Data_Get_Struct(self, Trie, trie); VALUE trie_node = rb_trie_node_alloc(cTrieNode); @@ -228,17 +272,65 @@ rb_iv_set(trie_node, "@state", Qnil); rb_iv_set(trie_node, "@full_state", rb_str_new2("")); return trie_node; } + +/* + * Document-class: TrieNode + * + * Represents a single node in the Trie. It can be used as a cursor to walk around the Trie. + * You can grab a TrieNode for the root of the Trie by using Trie#root. + * + */ + +static VALUE rb_trie_node_alloc(VALUE klass) { + VALUE obj; + obj = Data_Wrap_Struct(klass, 0, trie_state_free, NULL); + return obj; +} + +/* nodoc */ +static VALUE rb_trie_node_initialize_copy(VALUE self, VALUE from) { + RDATA(self)->data = trie_state_clone(RDATA(from)->data); + + rb_iv_set(self, "@state", rb_iv_get(from, "@state")); + rb_iv_set(self, "@full_state", rb_iv_get(from, "@full_state")); + + return self; +} + +/* + * call-seq: + * state -> single character + * + * Returns the letter that the TrieNode instance points to. So, if the node is pointing at the "e" in "monkeys", the state is "e". + * + */ static VALUE rb_trie_node_get_state(VALUE self) { return rb_iv_get(self, "@state"); } + +/* + * call-seq: + * full_state -> string + * + * Returns the full string from the root of the Trie up to this node. So if the node pointing at the "e" in "monkeys", + * the full_state is "monke". + * + */ static VALUE rb_trie_node_get_full_state(VALUE self) { return rb_iv_get(self, "@full_state"); } +/* + * call-seq: + * walk!(letter) -> TrieNode + * + * Tries to walk down a particular branch of the Trie. It modifies the node it is called on. + * + */ static VALUE rb_trie_node_walk_bang(VALUE self, VALUE rchar) { TrieState *state; Data_Get_Struct(self, TrieState, state); if(RSTRING(rchar)->len != 1) @@ -254,10 +346,47 @@ return self; } else return Qnil; } +/* + * call-seq: + * walk(letter) -> TrieNode + * + * Tries to walk down a particular branch of the Trie. It clones the node it is called on and + * walks with that one, leaving the original unchanged. + * + */ +static VALUE rb_trie_node_walk(VALUE self, VALUE rchar) { + VALUE new_node = rb_funcall(self, rb_intern("dup"), 0); + + TrieState *state; + Data_Get_Struct(new_node, TrieState, state); + + if(RSTRING(rchar)->len != 1) + return Qnil; + + Bool result = trie_state_walk(state, *RSTRING(rchar)->ptr); + + if(result) { + rb_iv_set(new_node, "@state", rchar); + VALUE full_state = rb_iv_get(new_node, "@full_state"); + rb_str_append(full_state, rchar); + rb_iv_set(new_node, "@full_state", full_state); + return self; + } else + return Qnil; +} + +/* + * call-seq: + * value + * + * Attempts to get the value at this node of the Trie. This only works if the node is a terminal + * (i.e. end of a key), otherwise it returns nil. + * + */ static VALUE rb_trie_node_value(VALUE self) { TrieState *state; TrieState *dup; Data_Get_Struct(self, TrieState, state); @@ -265,63 +394,59 @@ trie_state_walk(dup, 0); TrieData trie_data = trie_state_get_data(dup); trie_state_free(dup); - return TRIE_DATA_ERROR == trie_data ? Qnil : INT2FIX(trie_data); + return TRIE_DATA_ERROR == trie_data ? Qnil : (VALUE)trie_data; } +/* + * call-seq: + * terminal? -> true/false + * + * Returns true if this node is at the end of a key. So if you have two keys in your Trie, "he" and + * "hello", and you walk all the way to the end of "hello", the "e" and the "o" will return true for terminal?. + * + */ static VALUE rb_trie_node_terminal(VALUE self) { TrieState *state; Data_Get_Struct(self, TrieState, state); return trie_state_is_terminal(state) ? Qtrue : Qnil; } +/* + * call-seq: + * leaf? -> true/false + * + * Returns true if there are no branches at this node. + */ static VALUE rb_trie_node_leaf(VALUE self) { TrieState *state; Data_Get_Struct(self, TrieState, state); return trie_state_is_leaf(state) ? Qtrue : Qnil; } -static VALUE rb_trie_node_clone(VALUE self) { - TrieState *state; - Data_Get_Struct(self, TrieState, state); - - VALUE new_node = rb_trie_node_alloc(cTrieNode); - - TrieState *new_state = trie_state_clone(state); - - RDATA(new_node)->data = new_state; - - rb_iv_set(new_node, "@state", rb_iv_get(self, "@state")); - rb_iv_set(new_node, "@full_state", rb_iv_get(self, "@full_state")); - - return new_node; -} - void Init_trie() { cTrie = rb_define_class("Trie", rb_cObject); rb_define_alloc_func(cTrie, rb_trie_alloc); - //rb_define_method(cTrie, "initialize", rb_trie_initialize, -2); - //rb_define_method(cTrie, "path", rb_trie_get_path, 0); rb_define_method(cTrie, "has_key?", rb_trie_has_key, 1); rb_define_method(cTrie, "get", rb_trie_get, 1); rb_define_method(cTrie, "add", rb_trie_add, -2); rb_define_method(cTrie, "delete", rb_trie_delete, 1); rb_define_method(cTrie, "children", rb_trie_children, 1); rb_define_method(cTrie, "children_with_values", rb_trie_children_with_values, 1); rb_define_method(cTrie, "root", rb_trie_root, 0); - //rb_define_method(cTrie, "save", rb_trie_save, 0); cTrieNode = rb_define_class("TrieNode", rb_cObject); rb_define_alloc_func(cTrieNode, rb_trie_node_alloc); + rb_define_method(cTrieNode, "initialize_copy", rb_trie_node_initialize_copy, 1); rb_define_method(cTrieNode, "state", rb_trie_node_get_state, 0); rb_define_method(cTrieNode, "full_state", rb_trie_node_get_full_state, 0); rb_define_method(cTrieNode, "walk!", rb_trie_node_walk_bang, 1); + rb_define_method(cTrieNode, "walk", rb_trie_node_walk, 1); rb_define_method(cTrieNode, "value", rb_trie_node_value, 0); rb_define_method(cTrieNode, "terminal?", rb_trie_node_terminal, 0); rb_define_method(cTrieNode, "leaf?", rb_trie_node_leaf, 0); - rb_define_method(cTrieNode, "clone", rb_trie_node_clone, 0); }