spec/io/io_spec.rb in daru-0.1.5 vs spec/io/io_spec.rb in daru-0.1.6

- old
+ new

@@ -1,8 +1,16 @@ describe Daru::IO do describe Daru::DataFrame do context ".from_csv" do + before do + %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file| + WebMock + .stub_request(:get,"http://dummy-remote-url/#{file}.csv") + .to_return(status: 200, body: File.read("spec/fixtures/#{file}.csv")) + end + end + it "loads from a CSV file" do df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv', col_sep: ' ', headers: true) df.vectors = [:image_resolution, :mls, :true_transform].to_index @@ -30,10 +38,28 @@ it "follows the order of columns given in CSV" do df = Daru::DataFrame.from_csv 'spec/fixtures/sales-funnel.csv' expect(df.vectors.to_a).to eq(%W[Account Name Rep Manager Product Quantity Price Status]) end + + it "handles empty rows in the CSV" do + df = Daru::DataFrame.from_csv 'spec/fixtures/empty_rows_test.csv' + expect(df.nrows).to eq(13) + end + + it "uses the custom boolean converter correctly" do + df = Daru::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: [:boolean] + expect(df['Domestic'].to_a).to all be_boolean + end + + it "checks for equal parsing of local CSV files and remote CSV files" do + %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file| + df_local = Daru::DataFrame.from_csv("spec/fixtures/#{file}.csv") + df_remote = Daru::DataFrame.from_csv("http://dummy-remote-url/#{file}.csv") + expect(df_local).to eq(df_remote) + end + end end context "#write_csv" do before do @df = Daru::DataFrame.new({ @@ -265,9 +291,184 @@ it "saves df to a file" do outfile = Tempfile.new('dataframe.df') @data_frame.save(outfile.path) a = Daru::IO.load(outfile.path) expect(a).to eq(@data_frame) + end + end + + context "#from_html" do + context "in wiki info table" do + let(:path) { "file://#{Dir.pwd}/spec/fixtures/wiki_table_info.html" } + let(:order) { ["FName", "LName", "Age"] } + let(:index) { ["One", "Two", "Three", "Four", "Five", "Six", "Seven"] } + let(:name) { "Wikipedia Information Table" } + + context "returns default dataframe" do + subject { Daru::DataFrame.from_html(path) } + + it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) } + its(:first) { is_expected.to eq (Daru::DataFrame.new( + [["Tinu", "Blaszczyk", "Lily", "Olatunkboh", "Adrienne", "Axelia", "Jon-Kabat"], + ["Elejogun", "Kostrzewski", "McGarrett", "Chijiaku", "Anthoula", "Athanasios", "Zinn"], + ["14", "25", "16", "22", "22", "22", "22"]], + order: ["First name","Last name","Age"] + ) + ) + } + end + + context "returns user-modified dataframe" do + subject { Daru::DataFrame.from_html(path, order: order, index: index, name: name) } + + it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) } + its(:first) { is_expected.to eq(Daru::DataFrame.new( + [["Tinu", "Blaszczyk", "Lily", "Olatunkboh", "Adrienne", "Axelia", "Jon-Kabat"], + ["Elejogun", "Kostrzewski", "McGarrett", "Chijiaku", "Anthoula", "Athanasios", "Zinn"], + ["14", "25", "16", "22", "22", "22", "22"]], + order: ["FName","LName", "Age"], + index: ["One", "Two", "Three", "Four", "Five", "Six", "Seven"], + name: "Wikipedia Information Table" + ) + ) + } + end + end + + context "in wiki climate data" do + let(:path) { "file://#{Dir.pwd}/spec/fixtures/wiki_climate.html" } + + context "returns default dataframe" do + subject { Daru::DataFrame.from_html(path) } + + it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) } + its('first.index') { is_expected.to eq(Daru::Index.new( + ["Record high °C (°F)", "Average high °C (°F)", "Daily mean °C (°F)", "Average low °C (°F)", "Record low °C (°F)", "Average rainfall mm (inches)", "Average rainy days", "Average relative humidity (%)", "Mean monthly sunshine hours", "Mean daily sunshine hours"] + ) + ) + } + + end + end + + context "with valid html table markups" do + let(:path) { "file://#{Dir.pwd}/spec/fixtures/valid_markup.html" } + let(:index) { ["W","X","Y","Z"] } + let(:name) { "Small HTML table with index" } + + context "returns user-modified dataframe" do + subject { Daru::DataFrame.from_html(path, index: index, name: name) } + + it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) } + its(:first) { is_expected.to eq(Daru::DataFrame.new( + [["6", "4","9","7"],["7","0","4","0"]], + order: ["a","b"], + index: ["W","X","Y","Z"], + name: "Small HTML table with index" + ) + ) + } + end + end + + context "in year-wise passengers figure" do + let(:path) { "file://#{Dir.pwd}/spec/fixtures/macau.html" } + let(:match) { "2001" } + let(:name) { "Year-wise Passengers Figure" } + + context "returns matching dataframes with index" do + subject { Daru::DataFrame.from_html(path, match: match, name: name) } + + it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) } + its('first.index') { is_expected.to eq(Daru::Index.new( + ["January","February","March","April","May","June","July","August","September","October","November","December","Total"] + ) + ) + } + its(:first) { is_expected.to eq(Daru::DataFrame.new( + [ + ["265,603","184,381","161,264","161,432","117,984",""], + ["249,259","264,066","209,569","168,777","150,772",""], + ["312,319","226,483","186,965","172,060","149,795",""], + ["351,793","296,541","237,449","180,241","179,049",""], + ["338,692","288,949","230,691","172,391","189,925",""], + ["332,630","271,181","231,328","157,519","175,402",""], + ["344,658","304,276","243,534","205,595","173,103",""], + ["360,899","300,418","257,616","241,140","178,118",""], + ["291,817","280,803","210,885","183,954","163,385",""], + ["327,232","298,873","231,251","205,726","176,879",""], + ["315,538","265,528","228,637","181,677","146,804",""], + ["314,866","257,929","210,922","183,975","151,362",""], + ["3,805,306","3,239,428","2,640,111","2,214,487","1,952,578","0"] + ].transpose, + order: ["2001","2000","1999","1998","1997","1996"], + index: ["January","February","March","April","May","June","July","August","September","October","November","December","Total"], + name: "Year-wise Passengers Figure" + ) + ) + } + end + end + + context "in share market data" do + let(:path) { "file://#{Dir.pwd}/spec/fixtures/moneycontrol.html" } + let(:match) { "Sun Pharma" } + let(:index) { ["Alpha", "Beta", "Gamma", "Delta", "Misc"] } + let(:name) { "Share Market Analysis" } + + context "returns matching dataframes" do + subject { Daru::DataFrame.from_html(path, match: match) } + + it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) } + its(:first) { is_expected.to eq(Daru::DataFrame.new( + [ + ["Sun Pharma","502.60","-65.05","2,117.87"], + ["Reliance","1356.90","19.60","745.10"], + ["Tech Mahindra","379.45","-49.70","650.22"], + ["ITC","315.85","6.75","621.12"], + ["HDFC","1598.85","50.95","553.91"] + ].transpose, + order: ["Company","Price","Change","Value (Rs Cr.)"] + ) + ) + } + end + + context "returns user-modified matching dataframes" do + subject { Daru::DataFrame.from_html(path, match: match, index: index, name: name) } + + it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) } + its(:last) { is_expected.to eq(Daru::DataFrame.new( + [ + ["Sun Pharma","502.60","-65.05","2,117.87"], + ["Reliance","1356.90","19.60","745.10"], + ["Tech Mahindra","379.45","-49.70","650.22"], + ["ITC","315.85","6.75","621.12"], + ["HDFC","1598.85","50.95","553.91"] + ].transpose, + order: ["Company","Price","Change","Value (Rs Cr.)"], + index: ["Alpha", "Beta", "Gamma", "Delta", "Misc"], + name: "Share Market Analysis" + ) + ) + } + end + + end + + context "in election results data" do + let(:path) { "file://#{Dir.pwd}/spec/fixtures/eciresults.html" } + + context "returns default dataframes" do + subject { Daru::DataFrame.from_html(path) } + + it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) } + its('first.vectors') { is_expected.to eq(Daru::Index.new( + ["PartyName", "Votes Wise(%)"] + ) + ) + } + end end end end describe Daru::Vector do