#!/usr/bin/perl -w
#
# Copyright 2003 Stephen Hahn.
# Licensed for use under the same terms as Perl itself.
#
#ident	"@(#)massage_log.pl	1.2	03/11/27 blueslugs.com"
#

require 5.6.1;
use strict;

#
# massage_log.pl - convert a mixed common/combined format Apache log
#     into an effectively combined format log

# A typical invocation
#
#	[ stop apache ]
#	$ perl /path/to/massage_log.pl < access_log > access_log.fixed
#	$ mv access_log access_log.original
#	$ mv access_log.fixed access_log
#	[ start apache ]
#
# (When might you need to fix a log file?  When you modify the LogFormat
# directive in httpd.conf and restart without starting a new log file.) 
#
# The Apache definitions for the combined and common log formats are as
# given below:
#
# "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
# "%h %l %u %t \"%r\" %>s %b" common
# 
# These two formats can be translated to the following regular expression.

while (<>) {
	next if (!
	/^(\S+)\s		# requestor
	(\S+)\s			# ?
	(\S+)\s			# ?
	\[([^\]]*)\]\s		# time
	"([^"]*)"\s		# URL
	(\d*)\s			# result
	(\d*)			# bytes
	\s?("[^"]*"		# referrer
	\s"[^"]*")?$/x);	# user agent

	my $comb_extra = defined($8) ? $8 : "\"-\" \"-\"";

	print "$1 $2 $3 [$4] \"$5\" $6 $7 $comb_extra:\n";
}

