spec/models/data_grid_spec.rb in marty-13.0.2 vs spec/models/data_grid_spec.rb in marty-14.0.0

- old
+ new

@@ -1,6 +1,7 @@ require 'spec_helper' +require 'benchmark/ips' module Marty::DataGridSpec # rubocop:disable Metrics/ModuleLength describe DataGrid do G1 = <<EOS state\tstring\tv\t\t @@ -187,13 +188,53 @@ NOT (Admin Premium Services|Admin Services|Admin Services Plus)\t-1.0 Admin Services Plus\t-1.625 Investor Services Acadamy\t-0.5 EOS + G1_with_nulls = <<EOS +strict_null_mode +state\tstring\tv\t\t +ltv\tnumrange\tv\t\t +fico\tnumrange\th\t\t + +\t\t>=600<700\t>=700<750\t>=750 +CA\t<=80\t1.1\t2.2\t3.3 +TX|HI\t>80<=105\t4.4\t5.5\t6.6 +NM\t<=80\t1.2\t2.3\t3.4 +MA\t>80<=105\t4.5\t5.6\t +NULL\t<=80\t11\t22\t33 +EOS + + G1_with_bool_nulls = <<EOS +strict_null_mode +bool_state\tboolean\tv\t\t +ltv\tnumrange\tv\t\t +fico\tnumrange\th\t\t + +\t\t>=600<700\t>=700<750\t>=750 +f\t>80<=105\t4.5\t5.6\t +NULL\t<=80\t11\t22\t33 +EOS + + G1_with_integer_nulls = <<EOS +strict_null_mode +int_state\tinteger\tv\t\t +ltv\tnumrange\tv\t\t +fico\tnumrange\th\t\t + +\t\t>=600<700\t>=700<750\t>=750 +1\t<=80\t1.1\t2.2\t3.3 +2\t>80<=105\t4.4\t5.5\t6.6 +3\t<=80\t1.2\t2.3\t3.4 +4|5\t>80<=105\t4.5\t5.6\t +NULL\t<=80\t11\t22\t33 +EOS + before(:each) do # Mcfly.whodunnit = Marty::User.find_by_login('marty') marty_whodunnit + Rails.application.config.marty.data_grid_plpg_lookups = false end def lookup_grid_helper(pt, gridname, params, follow = false, distinct = true) dgh = Marty::DataGrid.lookup_h(pt, gridname) res = Marty::DataGrid.lookup_grid_distinct_entry_h(pt, params, dgh, nil, follow, @@ -211,10 +252,114 @@ it 'should not allow imports with last blank row' do expect do dg_from_import('Gh', Gh + "\t\t\n") end.to raise_error(RuntimeError) end + + it 'show not allow import NULL fields unless strict_null_mode is on' do + expect do + dg_from_import( + 'G1_with_nulls', + G1_with_nulls.gsub("strict_null_mode\n", '') + ) + end.to raise_error( + /NULL is not supported in grids without strict_null_mode/ + ) + end + + it 'should import wildcards' do + dg = dg_from_import('G1', G1) + state_attr = dg.metadata.find { |key| key['attr'] == 'state' } + expect(state_attr['keys'].last).to be nil + expect(state_attr['wildcards'].last).to be true + expect(state_attr['wildcards']).to eq [false, false, false, false, true] + end + + it 'allows to import NULL values in string fields' do + dg = dg_from_import('G1_with_nulls', G1_with_nulls) + state_attr = dg.metadata.find { |key| key['attr'] == 'state' } + expect(state_attr['keys'].last).to be nil + expect(state_attr['wildcards'].last).to be false + + # FIXME: do we actually need mixing nulls with values? + dg = dg_from_import( + 'G1_with_nulls2', + G1_with_nulls.sub('NULL', 'NY|NULL') + ) + state_attr = dg.metadata.find { |key| key['attr'] == 'state' } + expect(state_attr['keys'].last).to eq [nil, 'NY'] + expect(state_attr['wildcards'].last).to be false + + dg = dg_from_import( + 'G1_with_nulls3', + G1_with_nulls.sub('NULL', 'NOT (NULL)') + ) + + state_attr = dg.metadata.find { |key| key['attr'] == 'state' } + expect(state_attr['keys'].last).to be nil + expect(state_attr['wildcards'].last).to be false + expect(state_attr['nots'].last).to be true + + dg = dg_from_import( + 'G1_with_nulls4', + G1_with_nulls.sub('NULL', 'NOT (NY|NULL)') + ) + + state_attr = dg.metadata.find { |key| key['attr'] == 'state' } + expect(state_attr['keys'].last).to eq [nil, 'NY'] + expect(state_attr['wildcards'].last).to be false + expect(state_attr['nots'].last).to be true + end + + it 'allows to import NULL values in integer field' do + dg = dg_from_import('G1_with_integer_nulls', G1_with_integer_nulls) + state_attr = dg.metadata.find { |key| key['attr'] == 'int_state' } + expect(state_attr['keys'].last).to be nil + expect(state_attr['wildcards'].last).to be false + + dg = dg_from_import( + 'G1_with_integer_nulls2', + G1_with_integer_nulls.sub('NULL', '6|NULL') + ) + + state_attr = dg.metadata.find { |key| key['attr'] == 'int_state' } + expect(state_attr['keys'].last).to eq [nil, 6] + expect(state_attr['nots'].last).to be false + expect(state_attr['wildcards'].last).to be false + + dg = dg_from_import( + 'G1_with_integer_nulls3', + G1_with_integer_nulls.sub('NULL', 'NOT (NULL)') + ) + + state_attr = dg.metadata.find { |key| key['attr'] == 'int_state' } + expect(state_attr['keys'].last).to be nil + expect(state_attr['nots'].last).to be true + expect(state_attr['wildcards'].last).to be false + + dg = dg_from_import( + 'G1_with_integer_nulls4', + G1_with_integer_nulls.sub('NULL', 'NOT (6|NULL)') + ) + + state_attr = dg.metadata.find { |key| key['attr'] == 'int_state' } + expect(state_attr['keys'].last).to eq [nil, 6] + expect(state_attr['nots'].last).to be true + expect(state_attr['wildcards'].last).to be false + end + + it 'allows to import NULL values in boolean field' do + dg = dg_from_import('G1_with_bool_nulls', G1_with_bool_nulls) + state_attr = dg.metadata.find { |key| key['attr'] == 'bool_state' } + expect(state_attr['keys'].last).to be nil + expect(state_attr['wildcards'].last).to be false + + dg = dg_from_import('G1_with_bool_nulls2', G1_with_bool_nulls.sub('NULL', 'NOT (NULL)')) + state_attr = dg.metadata.find { |key| key['attr'] == 'bool_state' } + expect(state_attr['keys'].last).to be nil + expect(state_attr['nots'].last).to be true + end end describe 'validations' do it 'should not allow bad axis types' do expect do @@ -304,11 +449,11 @@ describe 'lookups for infinity' do let(:pt) { 'infinity' } before(:each) do %w[G1 G2 G3 G4 G5 G6 G7 G8 Ga Gb - Gc Gd Ge Gf Gg Gh Gj Gl].each do |g| + Gc Gd Ge Gf Gg Gh Gj Gl G1_with_nulls].each do |g| dg_from_import(g, "Marty::DataGridSpec::#{g}".constantize) end end context 'should handle NULL key values' do @@ -349,10 +494,28 @@ res = Marty::DataGrid.lookup_grid_h(pt, dgh, { 'i' => 13, 'n' => 15 }, true) expect(res).to eq('N') end end + it 'should cast types' do + res = Marty::DataGrid.lookup_grid_h(pt, 'Gf', { 'i' => 13, 'n' => 15 }, true) + expect(res).to eq('N') + + res = Marty::DataGrid.lookup_grid_h(pt, 'Gf', { 'i' => '13', 'n' => '15' }, true) + expect(res).to eq('N') + + res = Marty::DataGrid.lookup_grid_h(pt, 'Gf', { 'b' => 'true', 'i4' => '6' }, false) + expect(res).to eq('Y') + + res = Marty::DataGrid.lookup_grid_h(pt, 'Gg', { 'i1' => 2, 'i2' => 1 }, false) + expect(res).to eq(1) + + dg_from_import('G9', G9) + res = Marty::DataGrid.lookup_grid_h(pt, 'G9', { 'state' => 4, 'ltv' => 81 }, false) + expect(res).to eq(456) + end + it 'should handle ambiguous lookups' do h1 = { 'property_state' => 'NY', 'county_name' => 'R', } @@ -452,11 +615,11 @@ 'G2', 'fico' => 720, 'ltv' => 100, 'cltv' => 110.1, ) - end.to raise_error(RuntimeError) + end.to raise_error(RuntimeError, /matches > 1/) end it 'should return nil when matching data grid cell is nil' do res = lookup_grid_helper('infinity', 'G1', @@ -472,54 +635,138 @@ 'G1', 'fico' => 720, 'state' => 'GU', 'ltv' => 80, ) + expect(res).to eq [22, 'G1'] end + it 'should treat nil as missing attr' do + expect do + res = lookup_grid_helper('infinity', + 'G1', + 'fico' => 720, + 'state' => 'NM', + 'ltv' => 80, + ) + end.to raise_error(RuntimeError, /matches > 1/) + + expect do + res = lookup_grid_helper('infinity', + 'G1', + 'fico' => 720, + 'ltv' => 80, + ) + end.to raise_error(RuntimeError, /matches > 1/) + + expect do + res = lookup_grid_helper('infinity', + 'G1', + 'fico' => 720, + 'state' => nil, + 'ltv' => 80, + ) + end.to raise_error(RuntimeError, /matches > 1/) + end + + it 'should handle string NULLS' do + res = lookup_grid_helper('infinity', + 'G1_with_nulls', + 'fico' => 720, + 'state' => nil, + 'ltv' => 80, + ) + + expect(res).to eq [22, 'G1_with_nulls'] + + expect do + lookup_grid_helper('infinity', + 'G1_with_nulls', + 'fico' => 720, + 'state' => 'BLABLA', + 'ltv' => 80, + ) + end.to raise_error(/Data Grid lookup failed/) + + dg = dg_from_import( + 'G1_with_nulls2', + G1_with_nulls.sub('NULL', 'NY|NULL') + ) + + res = lookup_grid_helper('infinity', + dg.name, + 'fico' => 720, + 'state' => nil, + 'ltv' => 80, + ) + + expect(res).to eq [22, dg.name] + + res = lookup_grid_helper('infinity', + dg.name, + 'fico' => 720, + 'state' => 'NY', + 'ltv' => 80, + ) + + expect(res).to eq [22, dg.name] + end + it 'should handle matches which also have a wildcard match' do dg_from_import('G9', G9) expect do res = lookup_grid_helper('infinity', 'G9', 'state' => 'CA', 'ltv' => 81, ) - end.to raise_error(RuntimeError) + end.to raise_error(RuntimeError, /matches > 1/) res = lookup_grid_helper('infinity', 'G9', 'state' => 'GU', 'ltv' => 81, ) expect(res).to eq [456, 'G9'] end - it 'should raise on nil attr values' do + # it 'should raise on nil attr values' do + # next + # dg_from_import('G9', G9) + # + # expect do + # lookup_grid_helper('infinity', + # 'G9', + # 'ltv' => 81, + # ) + # end.to raise_error(/matches > 1/) + # + # err = /Data Grid lookup failed/ + # expect do + # lookup_grid_helper('infinity', + # 'G9', + # { 'state' => 'CA', 'ltv' => nil }, + # false, false) + # end.to raise_error(err) + # + # res = lookup_grid_helper('infinity', + # 'G9', + # { 'state' => nil, 'ltv' => 81 }, + # false, false) + # + # expect(res).to eq [456, 'G9'] + # end + + it 'should raise if nothing was found' do dg_from_import('G9', G9) expect do lookup_grid_helper('infinity', 'G9', - 'ltv' => 81, + 'ltv' => 80, ) - end.to raise_error(/matches > 1/) - - err = /Data Grid lookup failed/ - expect do - lookup_grid_helper('infinity', - 'G9', - { 'state' => 'CA', 'ltv' => nil }, - false, false) - end.to raise_error(err) - - res = lookup_grid_helper('infinity', - 'G9', - { 'state' => nil, 'ltv' => 81 }, - false, false) - - expect(res).to eq [456, 'G9'] + end.to raise_error(/Data Grid lookup failed/) end it 'should handle boolean keys' do res = lookup_grid_helper('infinity', 'G4', @@ -633,25 +880,29 @@ [4.5, 5.6, 6.7]] expected_metadata = [{ 'dir' => 'v', 'attr' => 'units', 'keys' => [[1, 2], [1, 2], [3, 4], [3, 4]], 'nots' => [false, false, false, false], + 'wildcards' => [false, false, false, false], 'type' => 'integer' }, { 'dir' => 'v', 'attr' => 'ltv', 'keys' => ['[,80]', '(80,105]', '[,80]', '(80,105]'], 'nots' => [false, false, false, false], + 'wildcards' => [false, false, false, false], 'type' => 'numrange' }, { 'dir' => 'h', 'attr' => 'cltv', 'keys' => ['[100,110)', '[110,120)', '[120,]'], 'nots' => [false, false, false], + 'wildcards' => [false, false, false], 'type' => 'numrange' }, { 'dir' => 'h', 'attr' => 'fico', 'keys' => ['[600,700)', '[700,750)', '[750,]'], 'nots' => [false, false, false], + 'wildcards' => [false, false, false], 'type' => 'numrange' }] dgh = Marty::DataGrid.lookup_h(pt, 'G2') res = Marty::DataGrid.lookup_grid_distinct_entry_h(pt, {}, dgh, nil, true, true) @@ -664,21 +915,24 @@ [4.5, 5.6, nil], [11.0, 22.0, 33.0]] expected_metadata = [{ 'dir' => 'v', 'attr' => 'state', 'keys' => [['CA'], ['HI', 'TX'], ['NM'], ['MA'], nil], 'nots' => [false, false, false, false, false], + 'wildcards' => [false, false, false, false, true], 'type' => 'string' }, { 'dir' => 'v', 'attr' => 'ltv', 'keys' => ['[,80]', '(80,105]', '[,80]', '(80,105]', '[,80]'], 'nots' => [false, false, false, false, false], + 'wildcards' => [false, false, false, false, false], 'type' => 'numrange' }, { 'dir' => 'h', 'attr' => 'fico', 'keys' => ['[600,700)', '[700,750)', '[750,]'], 'nots' => [false, false, false], + 'wildcards' => [false, false, false], 'type' => 'numrange' }] dgh = Marty::DataGrid.lookup_h(pt, 'G8') res = Marty::DataGrid.lookup_grid_distinct_entry_h(pt, { 'ltv' => 10, 'state' => 'RI' }, @@ -692,10 +946,11 @@ expected_data = [['G1'], ['G2'], ['G3']] expected_metadata = [{ 'dir' => 'v', 'attr' => 'ltv', 'keys' => ['[,115]', '(115,135]', '(135,140]'], 'nots' => [false, false, false], + 'wildcards' => [false, false, false], 'type' => 'numrange' }] dgh = Marty::DataGrid.lookup_h(pt, 'G8') res = Marty::DataGrid.lookup_grid_distinct_entry_h(pt, { 'ltv' => 10, 'state' => 'RI' }, @@ -943,9 +1198,68 @@ true\t1\t<10\t<10.0\t#{values3[0]} \t2\t\t\t#{values3[1]} false\t\t>10\t\t#{values3[2]} EOS end + + describe 'performance' do + before(:each) do + %w[G1 Gf Gl].each do |g| + dg_from_import(g, "Marty::DataGridSpec::#{g}".constantize) + end + end + + after do + Rails.application.config.marty.data_grid_plpg_lookups = false + end + + let(:pt) { 'infinity' } + + grid_data = { + 'Gf' => { 'b' => true }, + 'G1' => { + 'fico' => 600, + 'state' => 'RI', + 'ltv' => 10, + }, + 'Gl' => { + 'fha_203k_option2' => 'Not Existing Services' + } + } + + grid_data.each_with_index do |(grid, params), index| + it "ruby lookup is faster than plpgsql #{index}" do + bm = Benchmark.ips do |x| + x.report('postgres') do + Rails.application.config.marty.data_grid_plpg_lookups = true + res = Marty::DataGrid.lookup_grid_h(pt, grid, params, false) + end + + x.report('ruby') do + Rails.application.config.marty.data_grid_plpg_lookups = false + res = Marty::DataGrid.lookup_grid_h(pt, grid, params, false) + end + + x.compare! + end + + h = bm.entries.each_with_object({}) do |e, hh| + hh[e.label] = e.stats.central_tendency + end + + factor = h['ruby'] / h['postgres'] + + if ENV['CI'] == 'true' + # Performance drops down in CI, probably due to running postgres + # in a separate container utilizing it's own CPU core. + expect(factor).to be > 0.8 + else + expect(factor).to be > 1.02 + end + end + end + end + describe 'constraint' do it 'constraint' do Mcfly.whodunnit = system_user Gemini::BudCategory.create!(name: 'cat1') Gemini::BudCategory.create!(name: 'cat2')