spec/io/io_spec.rb in daru-0.1.5 vs spec/io/io_spec.rb in daru-0.1.6
- old
+ new
@@ -1,8 +1,16 @@
describe Daru::IO do
describe Daru::DataFrame do
context ".from_csv" do
+ before do
+ %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
+ WebMock
+ .stub_request(:get,"http://dummy-remote-url/#{file}.csv")
+ .to_return(status: 200, body: File.read("spec/fixtures/#{file}.csv"))
+ end
+ end
+
it "loads from a CSV file" do
df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
col_sep: ' ', headers: true)
df.vectors = [:image_resolution, :mls, :true_transform].to_index
@@ -30,10 +38,28 @@
it "follows the order of columns given in CSV" do
df = Daru::DataFrame.from_csv 'spec/fixtures/sales-funnel.csv'
expect(df.vectors.to_a).to eq(%W[Account Name Rep Manager Product Quantity Price Status])
end
+
+ it "handles empty rows in the CSV" do
+ df = Daru::DataFrame.from_csv 'spec/fixtures/empty_rows_test.csv'
+ expect(df.nrows).to eq(13)
+ end
+
+ it "uses the custom boolean converter correctly" do
+ df = Daru::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: [:boolean]
+ expect(df['Domestic'].to_a).to all be_boolean
+ end
+
+ it "checks for equal parsing of local CSV files and remote CSV files" do
+ %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
+ df_local = Daru::DataFrame.from_csv("spec/fixtures/#{file}.csv")
+ df_remote = Daru::DataFrame.from_csv("http://dummy-remote-url/#{file}.csv")
+ expect(df_local).to eq(df_remote)
+ end
+ end
end
context "#write_csv" do
before do
@df = Daru::DataFrame.new({
@@ -265,9 +291,184 @@
it "saves df to a file" do
outfile = Tempfile.new('dataframe.df')
@data_frame.save(outfile.path)
a = Daru::IO.load(outfile.path)
expect(a).to eq(@data_frame)
+ end
+ end
+
+ context "#from_html" do
+ context "in wiki info table" do
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/wiki_table_info.html" }
+ let(:order) { ["FName", "LName", "Age"] }
+ let(:index) { ["One", "Two", "Three", "Four", "Five", "Six", "Seven"] }
+ let(:name) { "Wikipedia Information Table" }
+
+ context "returns default dataframe" do
+ subject { Daru::DataFrame.from_html(path) }
+
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
+ its(:first) { is_expected.to eq (Daru::DataFrame.new(
+ [["Tinu", "Blaszczyk", "Lily", "Olatunkboh", "Adrienne", "Axelia", "Jon-Kabat"],
+ ["Elejogun", "Kostrzewski", "McGarrett", "Chijiaku", "Anthoula", "Athanasios", "Zinn"],
+ ["14", "25", "16", "22", "22", "22", "22"]],
+ order: ["First name","Last name","Age"]
+ )
+ )
+ }
+ end
+
+ context "returns user-modified dataframe" do
+ subject { Daru::DataFrame.from_html(path, order: order, index: index, name: name) }
+
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
+ its(:first) { is_expected.to eq(Daru::DataFrame.new(
+ [["Tinu", "Blaszczyk", "Lily", "Olatunkboh", "Adrienne", "Axelia", "Jon-Kabat"],
+ ["Elejogun", "Kostrzewski", "McGarrett", "Chijiaku", "Anthoula", "Athanasios", "Zinn"],
+ ["14", "25", "16", "22", "22", "22", "22"]],
+ order: ["FName","LName", "Age"],
+ index: ["One", "Two", "Three", "Four", "Five", "Six", "Seven"],
+ name: "Wikipedia Information Table"
+ )
+ )
+ }
+ end
+ end
+
+ context "in wiki climate data" do
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/wiki_climate.html" }
+
+ context "returns default dataframe" do
+ subject { Daru::DataFrame.from_html(path) }
+
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
+ its('first.index') { is_expected.to eq(Daru::Index.new(
+ ["Record high °C (°F)", "Average high °C (°F)", "Daily mean °C (°F)", "Average low °C (°F)", "Record low °C (°F)", "Average rainfall mm (inches)", "Average rainy days", "Average relative humidity (%)", "Mean monthly sunshine hours", "Mean daily sunshine hours"]
+ )
+ )
+ }
+
+ end
+ end
+
+ context "with valid html table markups" do
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/valid_markup.html" }
+ let(:index) { ["W","X","Y","Z"] }
+ let(:name) { "Small HTML table with index" }
+
+ context "returns user-modified dataframe" do
+ subject { Daru::DataFrame.from_html(path, index: index, name: name) }
+
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
+ its(:first) { is_expected.to eq(Daru::DataFrame.new(
+ [["6", "4","9","7"],["7","0","4","0"]],
+ order: ["a","b"],
+ index: ["W","X","Y","Z"],
+ name: "Small HTML table with index"
+ )
+ )
+ }
+ end
+ end
+
+ context "in year-wise passengers figure" do
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/macau.html" }
+ let(:match) { "2001" }
+ let(:name) { "Year-wise Passengers Figure" }
+
+ context "returns matching dataframes with index" do
+ subject { Daru::DataFrame.from_html(path, match: match, name: name) }
+
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
+ its('first.index') { is_expected.to eq(Daru::Index.new(
+ ["January","February","March","April","May","June","July","August","September","October","November","December","Total"]
+ )
+ )
+ }
+ its(:first) { is_expected.to eq(Daru::DataFrame.new(
+ [
+ ["265,603","184,381","161,264","161,432","117,984",""],
+ ["249,259","264,066","209,569","168,777","150,772",""],
+ ["312,319","226,483","186,965","172,060","149,795",""],
+ ["351,793","296,541","237,449","180,241","179,049",""],
+ ["338,692","288,949","230,691","172,391","189,925",""],
+ ["332,630","271,181","231,328","157,519","175,402",""],
+ ["344,658","304,276","243,534","205,595","173,103",""],
+ ["360,899","300,418","257,616","241,140","178,118",""],
+ ["291,817","280,803","210,885","183,954","163,385",""],
+ ["327,232","298,873","231,251","205,726","176,879",""],
+ ["315,538","265,528","228,637","181,677","146,804",""],
+ ["314,866","257,929","210,922","183,975","151,362",""],
+ ["3,805,306","3,239,428","2,640,111","2,214,487","1,952,578","0"]
+ ].transpose,
+ order: ["2001","2000","1999","1998","1997","1996"],
+ index: ["January","February","March","April","May","June","July","August","September","October","November","December","Total"],
+ name: "Year-wise Passengers Figure"
+ )
+ )
+ }
+ end
+ end
+
+ context "in share market data" do
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/moneycontrol.html" }
+ let(:match) { "Sun Pharma" }
+ let(:index) { ["Alpha", "Beta", "Gamma", "Delta", "Misc"] }
+ let(:name) { "Share Market Analysis" }
+
+ context "returns matching dataframes" do
+ subject { Daru::DataFrame.from_html(path, match: match) }
+
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
+ its(:first) { is_expected.to eq(Daru::DataFrame.new(
+ [
+ ["Sun Pharma","502.60","-65.05","2,117.87"],
+ ["Reliance","1356.90","19.60","745.10"],
+ ["Tech Mahindra","379.45","-49.70","650.22"],
+ ["ITC","315.85","6.75","621.12"],
+ ["HDFC","1598.85","50.95","553.91"]
+ ].transpose,
+ order: ["Company","Price","Change","Value (Rs Cr.)"]
+ )
+ )
+ }
+ end
+
+ context "returns user-modified matching dataframes" do
+ subject { Daru::DataFrame.from_html(path, match: match, index: index, name: name) }
+
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
+ its(:last) { is_expected.to eq(Daru::DataFrame.new(
+ [
+ ["Sun Pharma","502.60","-65.05","2,117.87"],
+ ["Reliance","1356.90","19.60","745.10"],
+ ["Tech Mahindra","379.45","-49.70","650.22"],
+ ["ITC","315.85","6.75","621.12"],
+ ["HDFC","1598.85","50.95","553.91"]
+ ].transpose,
+ order: ["Company","Price","Change","Value (Rs Cr.)"],
+ index: ["Alpha", "Beta", "Gamma", "Delta", "Misc"],
+ name: "Share Market Analysis"
+ )
+ )
+ }
+ end
+
+ end
+
+ context "in election results data" do
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/eciresults.html" }
+
+ context "returns default dataframes" do
+ subject { Daru::DataFrame.from_html(path) }
+
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
+ its('first.vectors') { is_expected.to eq(Daru::Index.new(
+ ["PartyName", "Votes Wise(%)"]
+ )
+ )
+ }
+ end
end
end
end
describe Daru::Vector do