lib/biomart/dataset.rb in biomart-0.1.1 vs lib/biomart/dataset.rb in biomart-0.1.2
- old
+ new
@@ -173,11 +173,12 @@
end
# Utility function to transform the tab-separated data retrieved
# from the Biomart search query into a ruby object.
def process_tsv( args, tsv )
- headers = []
+ headers = []
+ parsed_data = []
if args[:attributes]
args[:attributes].each do |attribute|
headers.push(attribute)
end
@@ -187,15 +188,59 @@
headers.push(name)
end
end
end
+ begin
+ parsed_data = CSV.parse( tsv, "\t" )
+ rescue CSV::IllegalFormatError => e
+ parsed_data = parse_tsv_line_by_line( headers.size, tsv )
+ end
+
return {
:headers => headers,
- :data => CSV.parse( tsv, "\t" )
+ :data => parsed_data
}
end
-
+
+ # Utility function to process TSV formatted data that raises errors. (Biomart
+ # has a habit of serving out this...) First attempts to use the CSV modules
+ # 'parse_line' function to read in the data, if that fails, tries to use split
+ # to recover the data.
+ def parse_tsv_line_by_line( expected_row_size, tsv )
+ parsed_data = []
+
+ data_by_line = tsv.split("\n")
+ data_by_line.each do |line|
+ elements = CSV::parse_line( line, "\t" )
+
+ if elements.size == 0
+ # This is a bad line (causing the above Exception), try and use split to recover.
+ # Alse add an empty value as split will miss the final value...
+ elements = line.split("\t")
+ elements.push(nil)
+
+ # Substitute blank strings for nils
+ elements.map! do |elem|
+ if elem === ""
+ nil
+ else
+ elem
+ end
+ end
+
+ # Add a safety clause...
+ if elements.size === expected_row_size
+ parsed_data.push(elements)
+ end
+ else
+ parsed_data.push(elements)
+ end
+ end
+
+ return parsed_data
+ end
+
# Utility function to quickly convert a search result into an array of hashes
# (keyed by the attribute name) for easier processing - this is not done by
# default on all searches as this can cause a large overhead on big data returns.
def conv_results_to_a_of_h( search_results )
result_objects = []
\ No newline at end of file