require File.expand_path(File.dirname(__FILE__) + "/../lib/skinny_jeans") require 'test/unit' require 'pp' class SkinnyJeansTest < Test::Unit::TestCase def test_will_work db_path = File.expand_path(File.dirname(__FILE__) + "/more_sample_data/skinny_jeans_deal_show.db") _logfile_path = File.expand_path(File.dirname(__FILE__) + "/more_sample_data/broken_access_big.log") puts db_path.inspect sj=SkinnyJeans::LogParser.execute(_logfile_path, sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/) end def test_can_parse_default_nginx_log_format db_path = File.expand_path(File.dirname(__FILE__) + "/skinny_jeans_test.db") FileUtils.rm(db_path) if File.exists?(db_path) _logfile_path = File.expand_path(File.dirname(__FILE__) + "/small_access_log_default_params.log") sj=SkinnyJeans::LogParser.execute(_logfile_path, sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/) assert_equal 4, sj.pageview.count assert_equal 4, sj.pageview.find_all_by_date("2011-02-13").count assert_equal 1, sj.pageview.find_by_path("tron-evolution-for-xbox-360").pageview_count assert_equal 1, sj.pageview.find_by_path("samsung-pn50c590-50-inch-plasma-hdtv").pageview_count assert_equal 1, sj.pageview.find_by_path("cheap-ipad-deals").pageview_count assert_equal 1, sj.pageview.find_by_path("buffalo-drivestation-axis-2tb-external-hard-drive").pageview_count end def test_parse_pick_up_where_left_off db_path = File.expand_path(File.dirname(__FILE__) + "/skinny_jeans_test.db") # db_path = "./skinny_jeans_test.db" FileUtils.rm(db_path) if File.exists?(db_path) _logfile_path = File.expand_path(File.dirname(__FILE__) + "/small_access_log.log") sj=SkinnyJeans::LogParser.execute(_logfile_path, sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/) # sj.execute assert_equal 20, sj.pageview.count assert_equal 2, sj.pageview.find_by_path("flip-video").pageview_count assert_equal 2, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count # puts " # # ----- # # " # the 2nd file is the same, but with 2 additional lines for flip-video and apple-ipod-touch _logfile_path_2 = File.expand_path(File.dirname(__FILE__) + "/small_access_log_part_2.log") sj=SkinnyJeans::LogParser.new(_logfile_path_2, sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/) sj.execute assert_equal 3, sj.pageview.find_by_path("flip-video").pageview_count assert_equal 2, sj.pageview.find_by_path("apple-ipod-touch").pageview_count # the last line of small_access_log_part_2 is over 1000 chars, make sure we can handle it properly assert_equal 255, sj.update.last.last_line_parsed.size _last_line=<<-EOF 82.31.245.117 - - [01/Dec/2010:11:52:29 -0800] "GET /deals/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control HTTP/1.1" 200 6244 "http://www.google.co.uk/imgres?imgurl=http://dealzon.com/pictures/deals/6656/large/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control.jpg%3F1264075353&imgrefurl=http://dealzon.com/deals/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control&usg=__Y-Tkbg0EefRA5uKrR0b2gs7HqSo=&h=300&w=287&sz=11&hl=en&start=49&zoom=1&tbnid=18tsbSh1DQmf2M:&tbnh=142&tbnw=133&prev=/images%3Fq%3Dblack%2Bradiator%2Bheater%26um%3D1%26hl%3Den%26biw%3D1020%26bih%3D624%26tbs%3Disch:10%2C1872&um=1&itbs=1&iact=hc&vpx=757&vpy=67&dur=140&hovh=230&hovw=220&tx=158&ty=118&ei=Tab2TMWkCMqL4AauyI2SBw&oei=Q6b2TLPJDoW3hQfDlri5BQ&esq=4&page=4&ndsp=15&ved=1t:429,r:14,s:49&biw=1020&bih=624" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; Sky Broadband; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; InfoPath.3)" "-" EOF assert_equal _last_line.strip![0..254], sj.update.last.last_line_parsed assert_equal 3, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count # puts " # # ----- # # " _logfile_path_3 = File.expand_path(File.dirname(__FILE__) + "/small_access_log_part_3.log") # the 3rd has 1 additional line so we can ensure we can leave off on a line over 255 characters sj=SkinnyJeans::LogParser.new(_logfile_path_3, sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/) sj.execute assert_equal 3, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count assert_equal 3, sj.pageview.find_by_path("apple-ipod-touch").pageview_count assert_equal 3, sj.pageview_keyword.find_by_path_and_keyword("apple-ipod-touch", "best deals for the 4th generation ipod touch 32 gb").pageview_count # PP.pp sj.pageview_keyword.all end end