Sha256: a3c6e6dc58fe2f64305dc0d4088d688ffb3fcddf789680a28454513fb98f9b4e
Contents?: true
Size: 1.42 KB
Versions: 3
Compression:
Stored size: 1.42 KB
Contents
module PubliSci module Readers class ARFF include PubliSci::Dataset::DataCube def generate_n3(arff, options={}) arff = IO.read(arff) if File.exist? arff options[:no_labels] = true # unless options[:no_labels] == nil @options = options comps = components(arff) obs = data(arff, comps.keys) generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options) end def relation(arff) arff.match(/@relation.+/i).to_a.first.split.last end def components(arff) #still needs support for quoted strings with whitespace h ={} arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line| if line =~ /\{.*}/ name = line.match(/\s.*/).to_a.first.strip.split.first type = :coded codes = line.match(/\{.*}/).to_a.first[1..-2].split(',') h[name] = {type: type, codes: codes} else name = line.split[1] type = line.split[2] h[name] = {type: type} end } h end def data(arff, attributes) lines = arff.split("\n") data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1] h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha} data_lines.map{|line| line = line.split ',' attributes.each_with_index{|a,i| h[a] << line[i]} } h end end end end
Version data entries
3 entries across 3 versions & 2 rubygems
Version | Path |
---|---|
publisci-0.1.3 | lib/publisci/readers/arff.rb |
publisci-0.1.2 | lib/bio-publisci/readers/arff.rb |
bio-publisci-0.1.0 | lib/bio-publisci/readers/arff.rb |