ext/trie/trie.c in tyler-trie-0.3.1 vs ext/trie/trie.c in tyler-trie-0.3.3
- old
+ new
@@ -4,73 +4,103 @@
#include <stdio.h>
#include <string.h>
VALUE cTrie, cTrieNode;
+/*
+ * Document-class: Trie
+ *
+ * A key-value data structure for string keys which is efficient memory usage and fast retrieval time.
+ *
+ */
+
static VALUE rb_trie_alloc(VALUE klass) {
VALUE obj;
obj = Data_Wrap_Struct(klass, 0, trie_free, trie_new());
return obj;
}
+/*
+ * call-seq:
+ * has_key?(key) -> true/false
+ *
+ * Determines whether or not a key exists in the Trie. Use this if you don't care about the value, as it
+ * is marginally faster than Trie#get.
+ *
+ */
static VALUE rb_trie_has_key(VALUE self, VALUE key) {
Trie *trie;
Data_Get_Struct(self, Trie, trie);
- if(trie_retrieve(trie, (TrieChar*)RSTRING(key)->ptr, NULL))
+ if(trie_has_key(trie, (TrieChar*)RSTRING(key)->ptr))
return Qtrue;
else
return Qnil;
}
+/*
+ * call-seq:
+ * get(key) -> value
+ * [key] -> value
+ *
+ * Retrieves the value for a particular key (or nil) from the Trie.
+ *
+ */
static VALUE rb_trie_get(VALUE self, VALUE key) {
Trie *trie;
Data_Get_Struct(self, Trie, trie);
TrieData data;
if(trie_retrieve(trie, (TrieChar*)RSTRING(key)->ptr, &data))
- return INT2FIX(data);
+ return (VALUE)data;
else
return Qnil;
}
+/*
+ * call-seq:
+ * add(key)
+ * add(key,value)
+ *
+ * Add a key, or a key and value to the Trie. If you add a key without a value it assumes true for the value.
+ *
+ */
static VALUE rb_trie_add(VALUE self, VALUE args) {
Trie *trie;
Data_Get_Struct(self, Trie, trie);
int size = RARRAY(args)->len;
if(size < 1 || size > 2)
return Qnil;
VALUE key;
key = RARRAY(args)->ptr[0];
- TrieData value = size == 2 ? NUM2INT(RARRAY(args)->ptr[1]) : TRIE_DATA_ERROR;
+ TrieData value = size == 2 ? RARRAY(args)->ptr[1] : TRIE_DATA_ERROR;
if(trie_store(trie, (TrieChar*)RSTRING(key)->ptr, value))
return Qtrue;
else
return Qnil;
}
+/*
+ * call-seq:
+ * delete(key)
+ *
+ * Delete a key from the Trie. Returns true if it deleted a key, nil otherwise.
+ *
+ */
static VALUE rb_trie_delete(VALUE self, VALUE key) {
Trie *trie;
Data_Get_Struct(self, Trie, trie);
if(trie_delete(trie, (TrieChar*)RSTRING(key)->ptr))
return Qtrue;
else
return Qnil;
}
-char* append_char(char* existing, int size, char c) {
- char *new = (char*) malloc(size + 2);
- memcpy(new, existing, size);
- new[size] = c;
- new[size + 1] = 0;
- return new;
-}
-
static VALUE walk_all_paths(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) {
int c;
for(c = 1; c < 256; c++) {
if(trie_state_is_walkable(state,c)) {
TrieState *next_state = trie_state_clone(state);
@@ -91,10 +121,17 @@
trie_state_free(next_state);
}
}
}
+/*
+ * call-seq:
+ * children(prefix) -> [ key, ... ]
+ *
+ * Finds all keys in the Trie beginning with the given prefix.
+ *
+ */
static VALUE rb_trie_children(VALUE self, VALUE prefix) {
if(NIL_P(prefix))
return rb_ary_new();
Trie *trie;
@@ -146,11 +183,11 @@
VALUE tuple = rb_ary_new();
rb_ary_push(tuple, rb_str_new2(word));
TrieData trie_data = trie_state_get_data(end_state);
- rb_ary_push(tuple, INT2FIX(trie_data));
+ rb_ary_push(tuple, (VALUE)trie_data);
rb_ary_push(children, tuple);
trie_state_free(end_state);
}
@@ -160,13 +197,17 @@
trie_state_free(next_state);
}
}
}
-
-
-
+/*
+ * call-seq:
+ * children_with_values(key) -> [ [key,value], ... ]
+ *
+ * Finds all keys with their respective values in the Trie beginning with the given prefix.
+ *
+ */
static VALUE rb_trie_children_with_values(VALUE self, VALUE prefix) {
if(NIL_P(prefix))
return rb_ary_new();
Trie *trie;
@@ -192,11 +233,11 @@
trie_state_walk(end_state, '\0');
VALUE tuple = rb_ary_new();
rb_ary_push(tuple, prefix);
TrieData trie_data = trie_state_get_data(end_state);
- rb_ary_push(tuple, INT2FIX(trie_data));
+ rb_ary_push(tuple, (VALUE)trie_data);
rb_ary_push(children, tuple);
trie_state_free(end_state);
}
@@ -208,16 +249,19 @@
trie_state_free(state);
return children;
}
-static VALUE rb_trie_node_alloc(VALUE klass) {
- VALUE obj;
- obj = Data_Wrap_Struct(klass, 0, trie_state_free, NULL);
- return obj;
-}
+static VALUE rb_trie_node_alloc(VALUE klass);
+/*
+ * call-seq:
+ * root -> TrieNode
+ *
+ * Returns a TrieNode representing the root of the Trie.
+ *
+ */
static VALUE rb_trie_root(VALUE self) {
Trie *trie;
Data_Get_Struct(self, Trie, trie);
VALUE trie_node = rb_trie_node_alloc(cTrieNode);
@@ -228,17 +272,65 @@
rb_iv_set(trie_node, "@state", Qnil);
rb_iv_set(trie_node, "@full_state", rb_str_new2(""));
return trie_node;
}
+
+/*
+ * Document-class: TrieNode
+ *
+ * Represents a single node in the Trie. It can be used as a cursor to walk around the Trie.
+ * You can grab a TrieNode for the root of the Trie by using Trie#root.
+ *
+ */
+
+static VALUE rb_trie_node_alloc(VALUE klass) {
+ VALUE obj;
+ obj = Data_Wrap_Struct(klass, 0, trie_state_free, NULL);
+ return obj;
+}
+
+/* nodoc */
+static VALUE rb_trie_node_initialize_copy(VALUE self, VALUE from) {
+ RDATA(self)->data = trie_state_clone(RDATA(from)->data);
+
+ rb_iv_set(self, "@state", rb_iv_get(from, "@state"));
+ rb_iv_set(self, "@full_state", rb_iv_get(from, "@full_state"));
+
+ return self;
+}
+
+/*
+ * call-seq:
+ * state -> single character
+ *
+ * Returns the letter that the TrieNode instance points to. So, if the node is pointing at the "e" in "monkeys", the state is "e".
+ *
+ */
static VALUE rb_trie_node_get_state(VALUE self) {
return rb_iv_get(self, "@state");
}
+
+/*
+ * call-seq:
+ * full_state -> string
+ *
+ * Returns the full string from the root of the Trie up to this node. So if the node pointing at the "e" in "monkeys",
+ * the full_state is "monke".
+ *
+ */
static VALUE rb_trie_node_get_full_state(VALUE self) {
return rb_iv_get(self, "@full_state");
}
+/*
+ * call-seq:
+ * walk!(letter) -> TrieNode
+ *
+ * Tries to walk down a particular branch of the Trie. It modifies the node it is called on.
+ *
+ */
static VALUE rb_trie_node_walk_bang(VALUE self, VALUE rchar) {
TrieState *state;
Data_Get_Struct(self, TrieState, state);
if(RSTRING(rchar)->len != 1)
@@ -254,10 +346,47 @@
return self;
} else
return Qnil;
}
+/*
+ * call-seq:
+ * walk(letter) -> TrieNode
+ *
+ * Tries to walk down a particular branch of the Trie. It clones the node it is called on and
+ * walks with that one, leaving the original unchanged.
+ *
+ */
+static VALUE rb_trie_node_walk(VALUE self, VALUE rchar) {
+ VALUE new_node = rb_funcall(self, rb_intern("dup"), 0);
+
+ TrieState *state;
+ Data_Get_Struct(new_node, TrieState, state);
+
+ if(RSTRING(rchar)->len != 1)
+ return Qnil;
+
+ Bool result = trie_state_walk(state, *RSTRING(rchar)->ptr);
+
+ if(result) {
+ rb_iv_set(new_node, "@state", rchar);
+ VALUE full_state = rb_iv_get(new_node, "@full_state");
+ rb_str_append(full_state, rchar);
+ rb_iv_set(new_node, "@full_state", full_state);
+ return self;
+ } else
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * value
+ *
+ * Attempts to get the value at this node of the Trie. This only works if the node is a terminal
+ * (i.e. end of a key), otherwise it returns nil.
+ *
+ */
static VALUE rb_trie_node_value(VALUE self) {
TrieState *state;
TrieState *dup;
Data_Get_Struct(self, TrieState, state);
@@ -265,63 +394,59 @@
trie_state_walk(dup, 0);
TrieData trie_data = trie_state_get_data(dup);
trie_state_free(dup);
- return TRIE_DATA_ERROR == trie_data ? Qnil : INT2FIX(trie_data);
+ return TRIE_DATA_ERROR == trie_data ? Qnil : (VALUE)trie_data;
}
+/*
+ * call-seq:
+ * terminal? -> true/false
+ *
+ * Returns true if this node is at the end of a key. So if you have two keys in your Trie, "he" and
+ * "hello", and you walk all the way to the end of "hello", the "e" and the "o" will return true for terminal?.
+ *
+ */
static VALUE rb_trie_node_terminal(VALUE self) {
TrieState *state;
Data_Get_Struct(self, TrieState, state);
return trie_state_is_terminal(state) ? Qtrue : Qnil;
}
+/*
+ * call-seq:
+ * leaf? -> true/false
+ *
+ * Returns true if there are no branches at this node.
+ */
static VALUE rb_trie_node_leaf(VALUE self) {
TrieState *state;
Data_Get_Struct(self, TrieState, state);
return trie_state_is_leaf(state) ? Qtrue : Qnil;
}
-static VALUE rb_trie_node_clone(VALUE self) {
- TrieState *state;
- Data_Get_Struct(self, TrieState, state);
-
- VALUE new_node = rb_trie_node_alloc(cTrieNode);
-
- TrieState *new_state = trie_state_clone(state);
-
- RDATA(new_node)->data = new_state;
-
- rb_iv_set(new_node, "@state", rb_iv_get(self, "@state"));
- rb_iv_set(new_node, "@full_state", rb_iv_get(self, "@full_state"));
-
- return new_node;
-}
-
void Init_trie() {
cTrie = rb_define_class("Trie", rb_cObject);
rb_define_alloc_func(cTrie, rb_trie_alloc);
- //rb_define_method(cTrie, "initialize", rb_trie_initialize, -2);
- //rb_define_method(cTrie, "path", rb_trie_get_path, 0);
rb_define_method(cTrie, "has_key?", rb_trie_has_key, 1);
rb_define_method(cTrie, "get", rb_trie_get, 1);
rb_define_method(cTrie, "add", rb_trie_add, -2);
rb_define_method(cTrie, "delete", rb_trie_delete, 1);
rb_define_method(cTrie, "children", rb_trie_children, 1);
rb_define_method(cTrie, "children_with_values", rb_trie_children_with_values, 1);
rb_define_method(cTrie, "root", rb_trie_root, 0);
- //rb_define_method(cTrie, "save", rb_trie_save, 0);
cTrieNode = rb_define_class("TrieNode", rb_cObject);
rb_define_alloc_func(cTrieNode, rb_trie_node_alloc);
+ rb_define_method(cTrieNode, "initialize_copy", rb_trie_node_initialize_copy, 1);
rb_define_method(cTrieNode, "state", rb_trie_node_get_state, 0);
rb_define_method(cTrieNode, "full_state", rb_trie_node_get_full_state, 0);
rb_define_method(cTrieNode, "walk!", rb_trie_node_walk_bang, 1);
+ rb_define_method(cTrieNode, "walk", rb_trie_node_walk, 1);
rb_define_method(cTrieNode, "value", rb_trie_node_value, 0);
rb_define_method(cTrieNode, "terminal?", rb_trie_node_terminal, 0);
rb_define_method(cTrieNode, "leaf?", rb_trie_node_leaf, 0);
- rb_define_method(cTrieNode, "clone", rb_trie_node_clone, 0);
}