#!/usr/bin/perl eval 'exec perl -S $0 "$@"' if 0; =pod =head1 NAME prepflog.pl - Pre-processor for pflogsumm =head1 SYNOPSIS prepflog.pl [-d ][--syslog_name=string] [file1 [filen]] prepflog.pl -[help|version] If no file(s) specified, reads from stdin. Output is to stdout. =head1 DESCRIPTION Utility to filter out postfix log lines due to re-injection into postfix of messages from a content filter or antivirus scanner. Reads from input and writes to output intercepting and disregarding lines which are related to the second passage of messages through postfix. The output is suitable to pass to pflogsumm and should avoid pflogsumm's double counting of these messages. As with pflogsumm itself, only postfix lines are processed. All other log lines are not passed on to the output. A non standard syslog name can be handled via the syslog_name parameter. Also log lines may be filtered by today or yesterday's date. =head1 OPTIONS -d today extract log lines just for today -d yesterday extract log lines just for yesterday -help Emit short usage message and bail out. --syslog_name=name Set syslog_name to look for for Postfix log entries. By default, prepflog looks for entries in logfiles with a syslog name of "postfix," the default. If you've set a non-default "syslog_name" parameter in your Postfix configuration, use this option to tell prepflog what that is. -version Print program name and version and bail out. =head1 EXAMPLES Typical use of this pre-processor would be: prepflog.pl -d yesterday /var/log/mail | pflogsumm.pl Any other options can be specified to pflogsumm as normal. The -d flag can however be omitted from pflogsumm if already specified with prepflog.pl. It will do no harm if left. If --syslog_name is used with prepflog.pl it must be specified again with pflogsumm. Processing of log files should be carried out just before rotating them. Even so it is still possible to miss messages unless processing considers all log files for a particular day. A script that may be helpful for this is (which considers that logfiles are compressed when rotated): #!/bin/sh LASTLOG=`ls -t /var/log/mail*.gz | head -n 1` /bin/zcat $LASTLOG | /bin/cat - /var/log/mail | \ /usr/local/bin/prepflog.pl -d yesterday | \ /usr/local/bin/pflogsumm =head1 CAVEATS The current release is a beta version, which has undergone internal testing. In particular it has not been tested on a highly loaded server or a large corpus of mail log examples As always, use a program in a test environment until you are comfortable about putting it into a production environment. =head1 BUGS None known, but needs more testing. =head1 NOTES Compatible with postfix 2.3 or later and pflogsumm 1.1.3 =head1 SEE ALSO pflogsumm(1) =head1 AUTHOR John Fawcett This script has been adapted from the pflogsumm written by Jim Seymour. Whole sections of pflogsumm code have been imported here. The reason for this is that having taken into account the way pflogsumm works, it should be easier to integrate in the future, if indeed that step is found to be a useful one. Any feedback is welcome: john@gufonero.com The script is currently available at: http://www.gufonero.com/postfix/ =head1 COPYRIGHT AND LICENSE Copyright (c) 2004 John Fawcett The parts of the code derived from pflogsumm are copyrighted by Jim Seymour. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You may have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. An on-line copy of the GNU General Public License can be found http://www.fsf.org/copyleft/gpl.html. =cut # Taken from pflogsumm use strict; use locale; use Getopt::Long; eval { require Date::Calc }; my $hasDateCalc = $@ ? 0 : 1; my $release = "0.4"; my $debug = 0; use vars qw( $progName $usageMsg %opts @monthNames %monthNums $thisYr $thisMon ); # Constants used throughout pflogsumm @monthNames = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); %monthNums = qw( Jan 0 Feb 1 Mar 2 Apr 3 May 4 Jun 5 Jul 6 Aug 7 Sep 8 Oct 9 Nov 10 Dec 11); ($thisMon, $thisYr) = (localtime(time()))[4,5]; $thisYr += 1900; my ( $cmd, $qid, $dateStr, $msgMonStr, $msgMon, $msgDay, $msgTimeStr, $msgHr, $msgMin, $msgSec, $msgYr); $progName = "prepflog.pl"; $usageMsg = "usage: $progName [-d ] [--syslog_name=name] [file1 [filen]] $progName -[version|help]"; GetOptions( "d=s" => \$opts{'d'}, "help" => \$opts{'help'}, "syslog_name=s" => \$opts{'syslogName'}, "version" => \$opts{'version'}, ) || die "$usageMsg\n"; # internally: 0 == none, undefined == -1 == all my $syslogName = $opts{'syslogName'}? $opts{'syslogName'} : "postfix"; if(defined($opts{'help'})) { print "$usageMsg\n"; exit 0; } if(defined($opts{'version'})) { print "$progName $release\n"; exit 0; } if($hasDateCalc) { # manually import the Date::Calc routine we want # # This looks stupid, but it's the only way to shut Perl up about # "Date::Calc::Delta_DHMS" used only once" if -w is on. (No, # $^W = 0 doesn't work in this context.) *Delta_DHMS = *Date::Calc::Delta_DHMS; *Delta_DHMS = *Date::Calc::Delta_DHMS; } $dateStr = get_datestr($opts{'d'}) if(defined($opts{'d'})); ######################################################## # start of prepflog code section ######################################################## # variables used by prepflog my %block = (); # array of lines which will be blocked my %held_connect = (); # array of smtpd connect lines which will be output or disregarded my %held_client = (); # array of smtpd client lines which will be output or disregarded my %held_lookup = (); # maps queue ids to process ids and hosts keys my %seen = (); # array of message ids for seen messages my $lookup; # temporary variable to hold keys for lookups my @out_queue = (); # queue for passing log lines back to main loop ######################################################## # end of prepflog code section ######################################################## while(<>) { next if(defined($dateStr) && ! /^$dateStr/o); s/: \[ID \d+ [^\]]+\] /: /o; # lose "[ID nnnnnn some.thing]" stuff my $logRmdr; # "Traditional" timestamp format? if((($msgMonStr, $msgDay, $msgHr, $msgMin, $msgSec, $logRmdr) = /^(...) {1,2}(\d{1,2}) (\d{2}):(\d{2}):(\d{2}) \S+ (.+)$/o) == 6) { # Convert string to numeric value for later "month rollover" check $msgMon = $monthNums{$msgMonStr}; } else { # RFC 3339 timestamp format? next unless((($msgYr, $msgMon, $msgDay, $msgHr, $msgMin, $msgSec, $logRmdr) = /^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:[\+\-](?:\d{2}):(?:\d{2})|Z) \S+ (.+)$/o) == 10); # RFC 3339 months start at "1", we index from 0 --$msgMon; } unless((($cmd, $qid) = $logRmdr =~ m#^(?:postfix|$syslogName)/([^\[:]*).*?: ([^:\s]+)#o) == 2 || (($cmd, $qid) = $logRmdr =~ m#^((?:postfix)(?:-script)?)(?:\[\d+\])?: ([^:\s]+)#o) == 2) { #print UNPROCD "$_"; next; } chomp; ######################################################## # start of prepflog code section ######################################################## # Each log line is filtered through the preprocess routine. # The task of preprocess is to decide whether to output a line. It may: # 1. output the line immediately # 2. disregard the line immediately # 3. reserve judgement until some future line has been processed. # 4. In this case, in response to some future log line, preprocess may: # a. output the held line(s) and the future log line # b. disregard the held line(s) and the future log line # # The preprocess routine may return: # - nothing (cases 2, 3, 4b) in which case processing continues with the next log line. # - one or more lines (cases 1, 4a). If preprocess will return multiple lines # then the first line is returned on the first call. # Any further lines are returned by calling preprocess with an empty input $_. # When the preprocess returns nothing (ie preprocessing of a particular log # line is finished), processing continues with the next log line. while ( $_ = preprocess($logRmdr) ) { print "$_\n"; $_ = ""; } } exit 0; # preprocess checks if it was called with non-empty input $_ # in which case it processes $_ via getnext() which may # add lines to the queue. # If called with empty input the next line from the queue is # returned. sub preprocess { my $logRmdr = pop(@_); if ($_ ne "") { getnext($logRmdr); } return shift(@out_queue); } # getnext contains the logic which decides which lines # can be output, which can be blocked and which have # to be held for future decision. sub getnext { my $logRmdr = pop(@_); if ($debug) { print "processing <".$logRmdr.">\n"; } if( $cmd eq "qmgr" ) { # If qmgr line has been blocked, disregard this log line # (Key for the block array is qmgr:qid) $lookup = "qmgr:".$qid; if (is_blocked()) { # If qmgr 'removed' line, free up space in the block array # since there are no more qmgr lines for this qid if( $logRmdr =~ /: removed/o ) { remove_block(); } return; } else { # qmgr line was not blocked so add to output queue push (@out_queue, $_); return; } } if( $cmd eq "smtpd" ) { if( $logRmdr =~ /\/smtpd\[(\d+)\]: disconnect from (.+)/o ) { # If smtpd disconnect line has been blocked, disregard log line # (Key in the block array is smtpd:pid:host) # Free up space in the blocked array since we will not see any more # smtpd lines for this message $lookup = "smtpd:".$1 .":".$2; if ( is_blocked()) { remove_block(); return; } } # If this is an smtpd reject/warning/hold/discard for previously held rows, # add the held rows to the output queue followed by the current line # (Key for the held array is qid) if ( $logRmdr =~ /\/smtpd\[\d+\]: (.*): (reject(?:_warning)?|hold|discard): /o) { $lookup=$1; output_held(); push (@out_queue, $_); return; } # If this is an smtpd connect line then hold the line # (Key for the held_connect array is pid:host) if ($logRmdr =~ /\/smtpd\[(\d+)\]: connect from (.+)/o) { $lookup= $1.":".$2; hold_connect(); return; } # If this is an smtpd client line then hold the line # (Key for the held_client array is $qid) # The value of pid:host will be stored to cross reference # later to the smtpd connect line which is stored by pid:host if ($logRmdr =~ /\/smtpd\[(\d+)\]: (.*): client=(.+?)(?:,|$)/o) { $lookup= $1.":".$3; $qid= $2; hold_client(); return; } } # If this is smtp line giving final disposal of message # (such as smtp,local,lmtp,pipe) and if it is blocked # disregard this log line. # (Key for the block array is deliver:qid) if ($logRmdr =~ /\[\d+\]: (.*): to=<[^>]*>, (?:orig_to=<[^>]*>, )?relay=[^,]+, delay=[^,]+, (?:delays=[^,]+, )?(?:dsn=\d+\.\d+\.\d+, )?status=\S+.*$/o ) { if ($debug) { print "final disposal\n"; } $lookup="deliver:".$1; if( is_blocked() ) { return; } } # Cleanup line links msg id to qid. If the msg id has already been seen # held lines for this qid are disregarded. Otherwise they are added # to output queue. # (Key for the seen array is msg id) if ( $cmd eq "cleanup" ) { if ($logRmdr =~ /\/cleanup\[\d+\]: (.*): message-id=\s??/o ) { $lookup = $2; if ( is_seen()) { # msg id has been seen already # add a block on future qmgr lines for this message # Key is qmgr:qid $lookup = "qmgr:".$1; add_block(); # add a block on future smtpd disconnect for this message # Key is the same one used to hold smtpd connect line $lookup= $1; $lookup = "smtpd:".get_held_lookup(); add_block(); # add a block on future final disposal of message lines (such as # smtp,local,lmtp,pipe etc). # Key is deliver:qid $lookup = "deliver:".$1; add_block(); # The held lines can be deleted # Key is qid $lookup= $1; remove_held(); # msg id must be removed from seen array to be able to # handle multiple recipients or representation of rejected # messages # Key is msg id $lookup = $2; remove_seen(); return; } else { # msg id has not been seen so add held lines to output queue. # Key is qid $lookup=$1; output_held(); remove_held(); # add current line to output queue push (@out_queue, $_); # add msg id to seen array $lookup = $2; add_seen(); return; } } } # default case, just add line to output queue push (@out_queue, $_); return; } ######################################################## # auxiliary routines for prepflog ######################################################## # these were so simple, that they could have been left # in the main body of the code, but I preferred # extracting them to help readability in the main body # add block sub add_block { $block{$lookup}=1; } # check if blocked sub is_blocked { return $block{$lookup}; } # remove block sub remove_block { delete $block{$lookup}; } # hold the smtpd connect line # Key is pid:host sub hold_connect { $held_connect{$lookup} = $_; } # hold the smtpd client line # Key is qid # Cross reference stored to key of held # smtpd connect line sub hold_client { $held_lookup{$qid} = $lookup; $held_client{$qid} = $_; } # lookup key to held_connect line # Key for the lookup is qid. sub get_held_lookup { return $held_lookup{$lookup}; } # output held lines. # Key is qid sub output_held { # output smtpd connect if( $held_connect{$held_lookup{$lookup}} ) { push (@out_queue, $held_connect{$held_lookup{$lookup}}); } # ouput smtpd client if( $held_client{$lookup} ) { push (@out_queue, $held_client{$lookup}); } } # remove held lines # Key is qid sub remove_held { delete $held_connect{$held_lookup{$lookup}}; delete $held_lookup{$lookup}; delete $held_client{$lookup}; } # add msg id to those seen sub add_seen { $seen{$lookup}=1;; } # check if msg id seen sub is_seen { return $seen{$lookup}; } # remove msg id from those seen sub remove_seen { delete $seen{$lookup}; } ######################################################## # end of prepflog code section ######################################################## # Taken from pflogsumm # return a date string to match in log sub get_datestr { my $dateOpt = $_[0]; my $time = time(); if($dateOpt eq "yesterday") { # Back up to yesterday $time -= ((localtime($time))[2] + 2) * 3600; } elsif($dateOpt ne "today") { die "$usageMsg\n"; } my ($t_mday, $t_mon) = (localtime($time))[3,4]; return sprintf("%s %2d", $monthNames[$t_mon], $t_mday); }