lib/quandl/format/dataset/load.rb in quandl_format-0.1.7 vs lib/quandl/format/dataset/load.rb in quandl_format-0.1.8

- old
+ new

@@ -1,103 +1,135 @@ class Quandl::Format::Dataset::Load - SECTION_DELIMITER = '-' + SYNTAX = { + comment: '#', + data: '-', + attribute: /^([a-z0-9_]+): (.+)/, + } class << self + def each_in_file(path, &block) + each_line( File.open(path, "r"), &block ) + end + + def each_line(interface, &block) + node = new_node + # for each file line + interface.each_line do |line| + # process line + node = process_line(line, node, &block) + end + process_tail(node, &block) + end + def file(path) - string(File.read(path).strip) + string( File.read(path) ) end - + def string(input) - nodes = parse_string(input) - nodes = parse_yaml_and_csv(nodes) - nodes = nodes_to_datasets(nodes) - nodes + # prepare to collect all datasets + datasets = [] + # initialize blank node + node = new_node + # for each line + input.each_line do |line| + # process each line when encountering dataset append it to datasets + node = process_line( line, node ){|d| datasets << d } + end + # signify end + process_tail(node){|d| datasets << d } + # return datasets + datasets end - protected + def new_node(line=0) + { line: line, section: :attributes, data: '', attributes: '', data_line: 0 } + end - def parse_string(input) - nodes = [] - section_type = :data - line_index = 0 - input.each_line do |rline| - # track current line index - line_index += 1 - # strip whitespace - line = rline.strip.rstrip - # ignore comments and blank lines - next if line[0] == '#' || line.blank? - - # are we looking at an attribute? - if line =~ attribute_format - # if we are leaving the data section - # then this is the start of a new node - nodes << { attributes: '', data: '', line: line_index } if section_type == :data - # update the section to attributes - section_type = :attributes - - # have we reached the end of the attributes? - elsif line == '-' - # update the section to data - nodes[-1][:data_line] = line_index + 1 - section_type = :data - # skip to the next line - next + def process_tail(node, &block) + # signify end + process_line('-', node, &block) + process_line('tail: end', node, &block) + end + + def process_line(rline, node, &block) + # increment node line + node[:line] += 1 + # strip whitespace + line = rline.strip.rstrip + # skip comments and blank lines + return node if line[0] == SYNTAX[:comment] || line.blank? + # looking at an attribute? + if line =~ SYNTAX[:attribute] + # exiting data section? + if node[:section] == :data + # we've reached the end of a node + # send it to the server + process_node(node, &block) + # start a new node while retaining current line line + node = new_node( node[:line] ) end - # add the line to it's section in the current node. - # YAML must include whitespace - nodes[-1][section_type] += (section_type == :data) ? "#{line}\n" : rline + # update the node's section + node[:section] = :attributes + # entering the data section? + elsif line[0] == SYNTAX[:data] + # update the node + node[:data_line] = node[:line] + 1 + node[:section] = :data + # skip to the next line + return node end - nodes + # append the line to the requested section + node[ node[:section] ] += ( node[:section] == :data ) ? "#{line}\n" : rline + # return the updated node + node end - def parse_yaml_and_csv(nodes) - output = [] - nodes.each do |node| - # parse attrs as yaml - node[:attributes] = parse_yaml_attributes(node) - # we cant continue unless attributes are present - next if node[:attributes].blank? - # parse data as csv - node[:attributes][:data] = Quandl::Data::Format.csv_to_array(node[:data]) - # onwards - output << node - end - output + def process_node(node, &block) + node = parse_node(node) + # fail on errored node + return false if node == false + # convert node to dataset + dataset = convert_node_to_dataset(node) + # do whatever we need to do with the node + block.call( dataset ) unless dataset.nil? + # success + true end - def nodes_to_datasets(nodes) - datasets = [] - nodes.each do |node| - dataset = node_to_dataset(node) - datasets << dataset if dataset - end - datasets + def parse_node(node) + # parse attrs as yaml + node[:attributes] = parse_yaml_attributes(node) + # we cant continue unless attributes are present + return false if node[:attributes].blank? + # parse data as csv + node[:data] = Quandl::Data::Format.csv_to_array(node[:data]) + node end + + protected def parse_yaml_attributes(node) YAML.load( node[:attributes] ).symbolize_keys! rescue => err log_yaml_parse_error(node, err) nil end - def node_to_dataset(node) - Quandl::Format::Dataset.new( node[:attributes] ) + def convert_node_to_dataset(node) + dataset = Quandl::Format::Dataset.new( node[:attributes] ) + dataset.data = node[:data] + dataset rescue => err log_dataset_error(node, err) + nil end - def attribute_format - /^([a-z0-9_]+): (.+)/ - end - def log_yaml_parse_error(node, err) message = "Attribute parse error at line #{ node[:line] + err.line } column #{err.column}. #{err.problem} (#{err.class})\n" - message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('-')?\n" unless node[:attributes] =~ /^-/ + message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('#{SYNTAX[:data]}')?\n" unless node[:attributes] =~ /^-/ message += "--" Quandl::Logger.error(message) end def log_dataset_error( node, err ) @@ -112,10 +144,9 @@ end # include original error message += "#{$!} (#{err.class})\n" message += "--" Quandl::Logger.error(message) - nil end end end \ No newline at end of file