README.rdoc in remote_table-1.1.1 vs README.rdoc in remote_table-1.1.2
- old
+ new
@@ -2,26 +2,142 @@
Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
==Real-life usage
-Used by data_miner (http://github.com/seamusabshere/data_miner)
+Used by http://data.brighterplanet.com and the data_miner gem (http://github.com/seamusabshere/data_miner)
==Example
-Taken from <tt>#{GEMDIR}/test/test_remote_table.rb</tt>:
-
- should "open an XLSX" do
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
- assert_equal "Secure encryption of all data", t[5]["Requirements"]
- end
-
-or on the console
-
?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
=> #<RemoteTable:0x359da50 [...]>
?> t[0]
=> {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
+
+More examples:
+
+ RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv'
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods'
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls'
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv'
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods'
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls'
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}'
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true
+
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0'
+
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA'
+
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false
+
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
+
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
+
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg'
+
+ RemoteTable.new 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls', :transform => { :class => FuelOilParser }
+
+ RemoteTable.new 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
+
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls'
+
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
+
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
+
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv'
+
+ RemoteTable.new 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true
+
+ RemoteTable.new 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA'
+
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
+
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz}
+
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false
+
+ RemoteTable.new 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0', :form_data => 'UserTableName=T_100_Segment__All_Carriers&[...]', :compression => :zip, :glob => '/*.csv'
+
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
+ :encoding => 'US-ASCII',
+ :row_xpath => '//table/tr[2]/td/table/tr',
+ :column_xpath => 'td'
+
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
+ :encoding => 'windows-1252',
+ :row_xpath => '//table/tr[2]/td/table/tr',
+ :column_xpath => 'td',
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
+ :responder => AircraftGuru.new)
+
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
+ :encoding => 'windows-1252',
+ :row_xpath => '//table/tr[2]/td/table/tr',
+ :column_xpath => 'td',
+ :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
+ :responder => AircraftGuru.new }
+
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
+ :filename => 'Gd6-dsc.txt',
+ :format => :fixed_width,
+ :crop => 21..26, # inclusive
+ :cut => '2-',
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
+ :schema => [[ 'code', 2, { :type => :string } ],
+ [ 'spacer', 2 ],
+ [ 'name', 52, { :type => :string } ]]
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
+ :format => :fixed_width,
+ :skip => 1,
+ :schema => [[ 'header4', 10, { :type => :string } ],
+ [ 'spacer', 1 ],
+ [ 'header5', 10, { :type => :string } ],
+ [ 'spacer', 12 ],
+ [ 'header6', 10, { :type => :string } ]]
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
+ :format => :fixed_width,
+ :keep_blank_rows => true,
+ :skip => 1,
+ :schema => [[ 'header4', 10, { :type => :string } ],
+ [ 'spacer', 1 ],
+ [ 'header5', 10, { :type => :string } ],
+ [ 'spacer', 12 ],
+ [ 'header6', 10, { :type => :string } ]]
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
+ :format => :fixed_width,
+ :skip => 1,
+ :schema => [[ 'header1', 10, { :type => :string } ],
+ [ 'spacer', 1 ],
+ [ 'header2', 10, { :type => :string } ],
+ [ 'spacer', 12 ],
+ [ 'header3', 10, { :type => :string } ]]
+
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
+ :format => :fixed_width,
+ :skip => 1,
+ :schema => [[ 'spacer', 11 ],
+ [ 'header2', 10, { :type => :string } ],
+ [ 'spacer', 1 ],
+ [ 'header3', 10, { :type => :string } ],
+ [ 'spacer', 1 ],
+ [ 'header1', 10, { :type => :string } ]]
+
+==Custom parsers
See the test file and also data_miner examples of custom parsers.
==Wishlist