Parent

Class/Module Index [+]

Quicksearch

Vizi::LogParser

This class handles the parsing of each line in the log file

Attributes

known_formats[R]

Public Class Methods

new(drop_ips, spider_ips, spider_names, page_extensions, homepage, accept_only_homepage, hostname, drop_refers_by_hostname) click to toggle source

@@log = ActiveRecord::Base.logger

# File lib/vizi/vizi_tracker.rb, line 117
def initialize(drop_ips, spider_ips, spider_names, page_extensions, homepage, accept_only_homepage, hostname, drop_refers_by_hostname)

  @drops = drop_ips

  @sips = spider_ips

  @snames = spider_names

  @page_extensions = page_extensions

  @homepage = homepage

  @accept_only_homepage = accept_only_homepage

  @hostname = hostname

  @drop_refers_by_hostname = drop_refers_by_hostname

  @log_format = []

  initialize_known_formats

  @parselog = Logger.new('./log/parse.log', shift_age = 'weekly')

  @parselog.level = Logger::WARN

end

Public Instance Methods

build_format(line) click to toggle source
# File lib/vizi/vizi_tracker.rb, line 150
    def build_format(line)

      fields = line.split(' ')

      i = 1

      @format = ""

      while i < fields.length

        @format << "%"+FIELDNAMES[fields[i]]+" "

#      p log_format

        i = i + 1

      end

      return @format

    end
check_format(line) click to toggle source

Checks which standard the log file (well one line) is Automatically checks for most complex (longest) regex first..

# File lib/vizi/vizi_tracker.rb, line 143
def check_format(line)

  @known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|

    return key if line.match(log_format.format_regex)

  }

  return :unknown

end
initialize_known_formats() click to toggle source

processes the format string into symbols and test regex and saves using LogFormat class

# File lib/vizi/vizi_tracker.rb, line 134
def initialize_known_formats

  @known_formats = {}

  LOG_FORMATS.each do |name, format|

    @known_formats[name] = Vizi::LogFormat.new(name, format)

  end

end
match_partial(field, fldarray) click to toggle source
# File lib/vizi/vizi_tracker.rb, line 162
def match_partial (field, fldarray)

  hit = nil

  hit = fldarray.index(field)

  return hit

  i = 0

  while k < fldarray.length

    hit = field.index(fldarray)

    break if hit != nil

    i = i + 1

  end

  return hit

end
parse_line(line, logformat) click to toggle source

apache files ... regex the file to determine logformat name IIS files ... parse the fields string to determine the file contents

# File lib/vizi/vizi_tracker.rb, line 177
    def parse_line(line, logformat)

      if logformat != nil

        log_format = logformat # get log_format string

        @format_name = "temp"

        data = line.split(' ')

      else

        @format_name = check_format(line) # look for matching formats, check each time

        log_format = @known_formats[@format_name] # found a matched format

        raise ArgumentError if log_format.nil? or line !~ log_format.format_regex

        data = line.downcase.scan(log_format.format_regex).flatten

      end

      parsed_data = {}

      log_format.format_symbols.size.times do |i|

        parsed_data[log_format.format_symbols[i]] = data[i] # load data for each format_symbol

      end

      if parsed_data[:dtstring]

        parsed_data[:dtstring] = parsed_data[:dtstring][1...-1]

        parsed_data[:dtstring] = parsed_data[:dtstring].sub(":", " ")

      end

      # Add ip as domain if we don't have a domain (virtual host)

      # Assumes we always have an ip

#    parsed_data[:domain] = parsed_data[:ip] unless parsed_data[:domain]  ##<<-- don't know this

#    parsed_data[:format_name] = @format_name



      parsed_data[:p_logformatname] = @format_name.to_s

      parsed_data[:p_logformat] = logformat

      parsed_data[:p_visitortype] = "H" # set default visitor type (H)uman

      parsed_data[:p_linetype] = "V" # linetype is (V)isitors

      parsed_data[:p_linetype] = "C" if parsed_data[:ip].nil? # reset if a comment line

      if @format_name.to_s == "w3c_f" # IIS file name ... generic

        @format = build_format(line) # parse fields to get log_format

        temp_format = Vizi::LogFormat.new(:temp, @format) # create temp format

        parsed_data[:p_logformat] = temp_format # shuttle the log_format object

        parsed_data[:p_logformatname] = "temp" # change the name

        parsed_data[:p_linetype] = "F" # linetype to (F)ield list

        parsed_data[:p_visitortype] = "-" # visitor type not relevant

      elsif @format_name.to_s == "w3c_c" # IIS file name ... comments

        parsed_data[:p_linetype] = "C" # linetype is (C)omment

        parsed_data[:p_visitortype] = "-"

      elsif  parsed_data[:p_linetype] == "C"

        @parselog.warn line

        @parselog.warn "Found comment lines embedded in the log file ... resetting to nil"

        parsed_data[:p_logformat] = nil

      else # parsing the field names



        if parsed_data[:datestring] != nil

          dt = Time.parse(parsed_data[:datestring]+" "+parsed_data[:timestring])

          parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)

        end



        if parsed_data[:dtstring] != nil

          dt = Time.parse(parsed_data[:dtstring])

          parsed_data[:datetime] = Time.gm(dt.year, dt.month, dt.day, dt.hour, dt.min, dt.sec)

        end



        if parsed_data[:request] != nil

          splitrequest = parsed_data[:request].gsub("/", " ").split

          parsed_data[:csuristem] = splitrequest[1]

        end



#     Now classify records based on logger rules ...



        parsed_data[:p_pageflag] = false

        if @accept_only_homepage

          parsed_data[:p_pageflag] = true if parsed_data[:csuristem].downcase.index(@homepage) == 1

        else

          parsed_data[:p_pageflag] = true if match_partial(parsed_data[:csuristem], @page_extensions)

        end



        parsed_data[:p_linetype] = "D" if @drops.index(parsed_data[:ip])

        parsed_data[:p_visitortype] = "S" if @sips.index(parsed_data[:ip])



        if parsed_data[:user_agent] != nil

          parsed_data[:p_visitortype] = "S" if match_partial(parsed_data[:user_agent], @snames)

        end



        if parsed_data[:referer] != nil and @drop_refers_by_hostname

          parsed_data[:p_linetype] = "D" if parsed_data[:referer].index(@hostname) != nil

        end



        if parsed_data[:referer] != nil

          y = (/(search\?\S*?[pq])=(\S*?)(&)/).match(parsed_data[:referer])

          parsed_data[:p_searchphrase] = y[2] if y != nil

          #p parsed_data[:ip] if y != nil

          #p parsed_data[:p_searchphrase] if y != nil

          if @drop_refers_by_hostname

            parsed_data[:p_linetype] = "D" if parsed_data[:referer].index(@hostname) != nil

          end

        end



      end

#    p parsed_data    

      parsed_data

    end

[Validate]

Generated with the Darkfish Rdoc Generator 2.