require File.dirname(__FILE__) + '/../lib/skinny_jeans' require 'test/unit' require 'pp' require 'fileutils' class SkinnyJeansTest < Test::Unit::TestCase def test_parse_pick_up_where_left_off db_path = "./skinny_jeans_test.db" FileUtils.rm(db_path) if File.exists?(db_path) sj=SkinnyJeans.new(logfile_path = "small_access_log.log", sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/) sj.execute assert_equal 20, sj.pageview.count assert_equal 2, sj.pageview.find_by_path("flip-video").pageview_count assert_equal 2, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count # puts " # # ----- # # " # the 2nd file is the same, but with 2 additional lines for flip-video and apple-ipod-touch sj=SkinnyJeans.new(logfile_path = "small_access_log_part_2.log", sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/) sj.execute assert_equal 3, sj.pageview.find_by_path("flip-video").pageview_count assert_equal 2, sj.pageview.find_by_path("apple-ipod-touch").pageview_count # the last line of small_access_log_part_2 is over 1000 chars, make sure we can handle it properly assert_equal 255, sj.update.last.last_line_parsed.size _last_line=<<-EOF 82.31.245.117 - - [01/Dec/2010:11:52:29 -0800] "GET /deals/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control HTTP/1.1" 200 6244 "http://www.google.co.uk/imgres?imgurl=http://dealzon.com/pictures/deals/6656/large/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control.jpg%3F1264075353&imgrefurl=http://dealzon.com/deals/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control&usg=__Y-Tkbg0EefRA5uKrR0b2gs7HqSo=&h=300&w=287&sz=11&hl=en&start=49&zoom=1&tbnid=18tsbSh1DQmf2M:&tbnh=142&tbnw=133&prev=/images%3Fq%3Dblack%2Bradiator%2Bheater%26um%3D1%26hl%3Den%26biw%3D1020%26bih%3D624%26tbs%3Disch:10%2C1872&um=1&itbs=1&iact=hc&vpx=757&vpy=67&dur=140&hovh=230&hovw=220&tx=158&ty=118&ei=Tab2TMWkCMqL4AauyI2SBw&oei=Q6b2TLPJDoW3hQfDlri5BQ&esq=4&page=4&ndsp=15&ved=1t:429,r:14,s:49&biw=1020&bih=624" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; Sky Broadband; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; InfoPath.3)" "-" EOF assert_equal _last_line.strip![0..254], sj.update.last.last_line_parsed assert_equal 3, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count # puts " # # ----- # # " # the 3rd has 1 additional line so we can ensure we can leave off on a line over 255 characters sj=SkinnyJeans.new(logfile_path = "small_access_log_part_3.log", sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/) sj.execute assert_equal 3, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count assert_equal 3, sj.pageview.find_by_path("apple-ipod-touch").pageview_count assert_equal 3, sj.pageview_keyword.find_by_path_and_keyword("apple-ipod-touch", "best deals for the 4th generation iPod touch 32 gb").pageview_count # PP.pp sj.pageview_keyword.all end end