Sha256: a3c6e6dc58fe2f64305dc0d4088d688ffb3fcddf789680a28454513fb98f9b4e

Contents?: true

Size: 1.42 KB

Versions: 3

Compression:

Stored size: 1.42 KB

Contents

module PubliSci
		module Readers
		class ARFF
			include PubliSci::Dataset::DataCube

			def generate_n3(arff, options={})
				arff = IO.read(arff) if File.exist? arff
				options[:no_labels] = true # unless options[:no_labels] == nil
				@options = options
				comps =  components(arff)
				obs = data(arff, comps.keys)
				generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
			end

			def relation(arff)
				arff.match(/@relation.+/i).to_a.first.split.last
			end

			def components(arff)
				#still needs support for quoted strings with whitespace
				h ={}
				arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
					if line =~ /\{.*}/
						name = line.match(/\s.*/).to_a.first.strip.split.first
						type = :coded
						codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
						h[name] = {type: type, codes: codes}
					else
						name = line.split[1]
						type = line.split[2]
						h[name] = {type: type}
					end
				}
				h
			end

			def data(arff, attributes)
				lines = arff.split("\n")
				data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
				h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
				data_lines.map{|line|
					line = line.split ','
					attributes.each_with_index{|a,i| h[a] << line[i]}
				}
				h
			end
		end
	end
end

Version data entries

3 entries across 3 versions & 2 rubygems

Version Path
publisci-0.1.3 lib/publisci/readers/arff.rb
publisci-0.1.2 lib/bio-publisci/readers/arff.rb
bio-publisci-0.1.0 lib/bio-publisci/readers/arff.rb