#!/usr/public/bin/perl
#
# From: Carl Paukstis <carlp@onpmomma.isc-br.com>
#
# Here's a PERL script I wrote to generate some statistics by subscriber
# for some of my mailing lists.  Feel free to improve it (send me
# improvements please) and/or hack it to better meet your needs.
#
# Gather ListProcessor mailing list statistics by subscriber from archive file.
# Statistics are for one month, specified on the command-line.  The script
# implicity assumes (and handles) "permissive" list mode, where messages are
# not necessarily from addresses which can be found in the .subscribers file.
#
# Usage: stats.pl listname mont
#
#  Written December, 1993 by Carl Paukstis  carlp@mail.spk.olivetti.com
#
#  Released to the public domain.  Please credit the author.
#  The author does NOT consider himself a PERL wizard, please send
#  suggestions to the address above.
#
#  NOTE: created with 4-column tabs!
#
# This script has been tested with Unix ListProcessor v6.0b. running on
# Olivetti Unix SysVr4 with mail handled by smail v3.  There may be
# dependencies on mail-header formats which are peculiar to smail.
# 
# Created and tested with Perl 4.0.1.8 patchlevel 36
#
eval "exec /usr/public/bin/perl -S $0 $*"
	if $running_under_some_shell;

chdir("/home/listproc") || die "can't change to listproc home dir; stopped";

@monames = ("Jan","Feb","Mar","Apr","May","Jun",
			"Jul","Aug","Sep","Oct","Nov","Dec");
$err=0;
if (@ARGV == 2) {
	$listname = $ARGV[0];
	$listname =~ tr/[a-z]/[A-Z]/;			# uppercase listname
	$month = $ARGV[1];
	if ($month =~ /^[0-9]+$/) 				# convert month number to name
		{ $month = $monames[$month - 1]; }
	substr($month,3,99) = "";				# make month 3-char string
	substr($month,0,1) =~ tr/[a-z]/[A-Z]/;	# first char uppercase
	substr($month,1,2) =~ tr/[A-Z]/[a-z]/;	# remaining chars lowercase
} else { 
	++$err; }
if (! -d "lists/$listname") { ++$err; }		# list-directory absent?

++$err;										# "prime" error-counter...
foreach $mon(@monames) {
	$month eq $mon && --$err;				# ...then undo if month is valid
}
if ($err) {
	print "Usage: stats.pl list-name month\n";
	exit 2;
}

#
#  Change the following to control input and output files.
#
$sub="lists/$listname/.subscribers";
$ali="lists/$listname/.aliases";
$mbox="lists/$listname/archive";
$destfile="|sort -n >/tmp/stats.$listname";

#
#  Build assoc-arrays for all subscribers
#
open(SUB,$sub) || die "Can't open $sub, stopped";
while(<SUB>) {
	chop;
	($addr,$mode,$pw,$conceal,$name) = split(/[\t\n ]+/,$_,5);
#	printf("'%s' %9s %-37s\n",$addr,$mode,$name);
	$bytes{$addr} = 0;
	$lines{$addr} = 0;
	$msgs{$addr} = 0;
	$flag{$addr} = 1;
	$names{$addr} = $name;
}
close(SUB);
#
#  Point aliased subscribers to the "real" address
#
open(ALI,$ali) || die "Can't open $ali, stopped";
while(<ALI>) {
	chop;
	($addr,$alias) = split(/[\t\n ]+/,$_,2);
	if ($flag{$alias} != 1) {
		print "No match for alias $alias\n";
	}
	$flag{$addr} = 2;
	$aliases{$addr} = $alias;
#	printf("'%s' (alias %s) = %s\n",$addr,$alias,$names{$alias});
}
close(ALI);

#
#  Spin through message archive; collect stats fo each sending address.
#
$new = 0;
$skipped = 0;
open(MBOX,$mbox) || die "Can't open $mbox, stopped";
while(<MBOX>) {
	if (/^From /) { 				# new mail-message
		$new = 1;
		++$totmsgs;
		chop;
		($first, $addr, $weekday, $msgmonth, $rest) = split(/[\t\n ]+/,$_,5);
		$addr =~ tr/a-z/A-Z/;
		if($addr=~/.*DAEMON.*/ || $addr=~/.*UUCP.*/ || $msgmonth ne $month) {
			++$skipped;				# ignore bogus senders
			while(<MBOX>) {
				last if (/^From /);
				}
			redo;
		}
		if($flag{$addr} == 0) {
			$flag{$addr} = 3;		# unknown addr; get info from "From:" line
		}
		if($flag{$addr} == 2) {		# aliased user; use alias for data-collect
			$addr = $aliases{$addr};
		}
		++$msgs{$addr};
#		printf("'%s' %4d\n",($names{$addr}) ? $names{$addr} : $addr,$msgs{$addr});
	}
	elsif (/^From: / && $flag{$addr} == 3) {	# for non-subscribers
		$flag{$addr} = 1;
		chop;
		s/From:\s+//;					# strip front of line
		s/\s+$//;						# and training whitespace
		$line = $_;
		if (/\((.*)\)/) {				# From: user@node (name here)
#			print "matched parens\n";
			$name = $1;
		}
		elsif (/<(.*)>/) {				# From: "name here" <user@node>
#			print "matched anglebrackets\n";
			s/\s*<.*$//;
			s/"//g;
			$name = $_;
		}
		else {
			$name = "-no name given-";
		}
		$bytes{$addr} = 0;
		$lines{$addr} = 0;
		$msgs{$addr} = 1;
		$flag{$addr} = 1;
		$names{$addr} = $name;
		print "No sub '$addr' ($name)\n";
	}
	elsif (/^\s*$/ && $new == 1) {	# first blank line after "From " 
		$new = 0;
	}
	elsif ($new == 0) {				# body of message
		++$lines{$addr};
		$bytes{$addr} += length;
	}
		
}
close(MBOX);
print "Total messages $totmsgs; skipped $skipped extraneous messages\n";
#
#  Output the report from the statistics arrays.
#  A couple of compromises were made in the formats to assure that the
#  output could be fed to 'sort -n' and come out looking decent.
#
open(REPORT,"$destfile");
select(REPORT);
$^="RT";
$|=1;
$= =999999;
$totmsgs = 0;
$totbytes = 0;
$totlines = 0;

foreach $addr(keys(%msgs)) {
	if ($msgs{$addr}) {					# do only subscribers with >0 messages
		write;
		$totmsgs += $msgs{$addr};
		$totbytes += $bytes{$addr};
		$totlines += $lines{$addr};
	}
}
$addr = "----------";
$names{$addr} ="<--- TOTAL";
$msgs{$addr} = $totmsgs;
$lines{$addr} = $totlines;
$bytes{$addr} = $totbytes;
write;

# This is the end of the actual program.

format RT =
-                Statistics for @||||||||| for @<<
								$listname,     $month
-Msgs  Lines  Bytes	    Sender                     Address
. 


format REPORT =
@>>>> @>>>>> @>>>>>  @<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$msgs{$addr},$lines{$addr},$bytes{$addr},($names{$addr}) ? $names{$addr}:$addr, $addr
. 
----------------------------- cut here -------------------------------
-- 
Carl Paukstis, RRR&RSG   DoD#0432 1KQSPI=8.80   carlp@mail.spk.olivetti.com
Olivetti North America                          carlp@mom.isc-br.com
(Oli North): will deny responsibility    voice: (509) 927-5439 0700-1600 M-F
Spokane, Washington, USA                   FAX: (509) 927-2499 24 hrs.

