[vhffs-dev] [2086] reworked web stats and repository stats

[ Thread Index | Date Index | More vhffs.org/vhffs-dev Archives ]


Revision: 2086
Author:   gradator
Date:     2012-03-02 01:20:46 +0100 (Fri, 02 Mar 2012)
Log Message:
-----------
reworked web stats and repository stats

Modified Paths:
--------------
    trunk/vhffs-api/src/Vhffs/Robots/Repository.pm
    trunk/vhffs-api/src/Vhffs/Robots/Web.pm
    trunk/vhffs-robots/src/repository_stats.pl
    trunk/vhffs-robots/src/web_stats.pl

Modified: trunk/vhffs-api/src/Vhffs/Robots/Repository.pm
===================================================================
--- trunk/vhffs-api/src/Vhffs/Robots/Repository.pm	2012-03-01 00:24:44 UTC (rev 2085)
+++ trunk/vhffs-api/src/Vhffs/Robots/Repository.pm	2012-03-02 00:20:46 UTC (rev 2086)
@@ -160,4 +160,227 @@
 	Vhffs::Robots::vhffs_log( $vhffs, 'Updated quota used for repository '.$repository->get_name.' (gid '.$repository->get_owner_gid.') to '.$used.' MB');
 }
 
+#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b"
+#So, add in your httpd.conf following lines :
+#LogFormat "%V %h %l %u %t \"%r\" %>s %b" vhffs
+#CustomLog /var/log/apache2/http.log vhffs
+sub awstats_stats {
+	my $vhffs = shift;
+	return undef unless defined $vhffs;
+
+	my $repoconf = $vhffs->get_config->get_service('repository');
+	return undef unless defined $repoconf;
+
+	my $log_incoming_root = $repoconf->{'log_incoming_root'};
+	my $log_parsed_root = $repoconf->{'log_parsed_root'};
+
+	unless( -d $log_incoming_root ) {
+		print 'ERROR: '.$log_incoming_root.' is not a directory'."\n";
+		return undef;
+	}
+	unless( -d $log_parsed_root ) {
+		print 'ERROR: '.$log_parsed_root.' is not a directory'."\n";
+		return undef;
+	}
+	unless( -x $repoconf->{'log_awstats'} ) {
+		print 'ERROR: '.$repoconf->{'log_awstats'}.' does no exist'."\n";
+		return undef;
+	}
+	unless( -f $repoconf->{'log_awstats_sample'} ) {
+		print 'ERROR: cannot find the awstat sample at '.$repoconf->{'log_awstats_sample'}."\n";
+		return undef;
+	}
+	if( $repoconf->{'log_apachelocal'} and not -x $repoconf->{'log_apachectl'} ) {
+		print 'ERROR: cannot find the apache2ctl binary at '.$repoconf->{'log_apachectl'}."\n";
+		return undef;
+	}
+
+	my $repos = Vhffs::Services::Repository::getall( $vhffs, Vhffs::Constants::ACTIVATED );
+	return undef unless defined $repos;
+
+	# Build a hash of all repositories names
+	my %repositorys;
+	foreach ( @{$repos} )  {
+		$repositorys{$_->get_name} = $_;
+	}
+
+	# Build downloads servers list
+	my @downloadservers;
+	opendir( my $dirfd, $log_incoming_root );
+	foreach( readdir( $dirfd ) )  {
+		next if /^\./;
+		my $path = $log_incoming_root.'/'.$_;
+		next unless -d $path;
+		push @downloadservers, { name => $_, path => $path };
+	}
+	closedir( $dirfd );
+
+	# Rotate downloads servers logs
+	#
+	# All *.log files, I know that the suffix is hardcoded but I don't bother to add a configuration
+	# entry for that, it's already too complicated, and, who would like anything other than .log ?
+	foreach my $downloadserver ( @downloadservers ) {
+
+		opendir( my $dirfd, $downloadserver->{path} );
+		foreach( readdir( $dirfd ) )  {
+			next unless /\.log$/;
+			Vhffs::Robots::rotate_log( $downloadserver->{path}.'/'.$_, $repoconf->{'log_incoming_rotations'}, $repoconf->{'log_incoming_compress'} );
+		}
+		closedir( $dirfd );
+
+		# put a file to tell downloadserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
+		open( my $filecycle , '>', $downloadserver->{path}.'/cycleok' );
+		close( $filecycle );
+	}
+
+	# Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
+	if( $repoconf->{'log_apachelocal'} ) {
+		my $childpid = open( my $output, '-|', $repoconf->{'log_apachectl'}, 'graceful' );
+		if($childpid) {
+			# read process output and discard
+			while(<$output>) {}
+
+			# wait for the child to finish
+			waitpid( $childpid, 0 );
+		}
+	} else {
+		sleep ( 180 );
+	}
+
+	# Deleting previous logs
+	unlink $log_incoming_root.'/mergedlog';
+	unlink $log_incoming_root.'/rejectlog';
+
+	# Merge all logs
+	open( my $mergedoutput, '>', $log_incoming_root.'/mergedlog' );
+	my $childpid = open( my $output, '-|', 'mergelog', ( map { $_->{path}.'/http.log.0' } @downloadservers ), ( map { $_->{path}.'/ftp.log.0' } @downloadservers ) );
+	if($childpid) {
+		# read process output and print to destination
+		while(<$output>) { print $mergedoutput $_; }
+
+		# wait for the child to finish
+		waitpid( $childpid, 0 );
+	}
+	close( $mergedoutput );
+
+	# Parse http logs
+	my $prev = '';
+	my $fileout;
+	my $ddir = $vhffs->get_config->get_datadir.'/repository/';
+	$ddir =~ s%[/]{2,}%/%g;
+
+	open( my $mergedin, '<', $log_incoming_root.'/mergedlog' );
+	open( my $rejectout, '>', $log_incoming_root.'/rejectlog' );
+
+	while( <$mergedin> ) {
+
+		my ( $remotehost, $rfc931, $authuser, $date, $request, $status, $size, $referer, $useragent ) = ( $_ =~ /^([^\s]*)\s+([^\s]*)\s+([^\s]*)\s+\[([^\]]*)\]\s+\"([^\"]*)\"\s+([^\s]*)\s+([^\s]*)(?:\s+\"([^\"]*)\")?(?:\s+\"([^\"]*)\")?$/ );
+		next unless defined $remotehost and defined $rfc931 and defined $authuser and defined $date and defined $request and defined $status and defined $size;
+
+		# define referer and useragent (convert common to combined log)
+		$referer = '-' unless defined $referer;
+		$useragent = '-' unless defined $useragent;
+
+		# remove the "/data/repository/" part of the query
+		$request =~ s%$ddir/*%/%;
+
+		# remove the http:// part of the query if it exists
+		$request =~ s%http://[^/]+/*%/%;
+
+		# add HTTP/1.0 at the end of the query if needed
+		$request .= ' HTTP/1.0' if( $request && $request !~ /\ HTTP\/1.[01]$/ );
+
+		# fetch the group
+		my ( $area ) = ( $request =~ /^[^\/]*\/([^\/]+)/ ); 
+
+		# rebuild
+		my $log = $remotehost.' '.$rfc931.' '.$authuser.' ['.$date.'] "'.$request.'" '.$status.' '.$size.' "'.$referer.'" "'.$useragent.'"';
+
+		# append log line to the concerned download area
+		next unless defined $area and defined $log;
+
+		my $repository = $repositorys{$area};  
+
+		# We are _NOT_ hosting this repository
+		unless( $repository )  {
+			print $rejectout $area.' '.$log."\n";
+			next;
+		}
+
+		# the repository changed
+		if ( $prev ne $area )  {
+			my $repodir = $log_parsed_root.'/'.$area.'/logs';
+			unless( -d $repodir ) {
+				File::Path::make_path( $repodir );
+				#chown( $repository->get_owner_uid, $repository->get_owner_gid, $repodir );
+				#chmod( 0770, $repodir );
+			}
+			unless( -d $repodir ) {
+				close( $fileout ) if defined $fileout;
+				undef $fileout;
+				$prev = '';
+				next;
+			}
+
+			close( $fileout ) if defined $fileout;
+			open( $fileout, '>>', $repodir.'/access.log');
+
+			$prev = $area;
+		}
+
+		print $fileout $log."\n";
+	}
+
+	close( $mergedin );
+	close( $rejectout );
+	close( $fileout ) if defined $fileout;
+
+	# Create a configuration file and generate statistic for each website
+	foreach ( @{$repos} ) {
+		my $reponame = $_->get_name;
+
+		my $weblogdir = $log_parsed_root.'/'.$reponame;
+		my $logpath = $weblogdir.'/logs/access.log';
+		my $datadir = $weblogdir.'/awstats';
+		my $conffile = $datadir.'/awstats.'.$reponame.'.conf';
+
+		next unless -f $logpath;
+		unless( -d $datadir ) {
+			File::Path::make_path( $datadir );
+		}
+		unless( -d $datadir ) {
+			next;
+		}
+
+		# Create the config file
+		open( my $awfilein, '<', $repoconf->{'log_awstats_sample'} );
+		open( my $awfileout, '>', $conffile );
+
+		while( <$awfilein> ) {
+			s/MY_DOMAINNAME/$reponame/g;
+			s/MY_LOGPATH/$logpath/g;
+			s/MY_DATADIR/$datadir/g;
+			print $awfileout $_;
+		}
+
+		close( $awfileout );
+		close( $awfilein );
+
+		# Generate statistics
+		my $childpid = open( my $output, '-|', $repoconf->{'log_awstats'}, '-config='.$reponame, '-update' );
+		if($childpid) {
+			# read process output and discard
+			while(<$output>) {}
+
+			# wait for the child to finish
+			waitpid( $childpid, 0 );
+		}
+
+		# Rotate logs for this website
+		Vhffs::Robots::rotate_log( $logpath, $repoconf->{'log_parsed_rotation'}, $repoconf->{'log_parsed_compress'} );
+	}
+
+	return 1;
+}
+
 1;

Modified: trunk/vhffs-api/src/Vhffs/Robots/Web.pm
===================================================================
--- trunk/vhffs-api/src/Vhffs/Robots/Web.pm	2012-03-01 00:24:44 UTC (rev 2085)
+++ trunk/vhffs-api/src/Vhffs/Robots/Web.pm	2012-03-02 00:20:46 UTC (rev 2086)
@@ -132,4 +132,207 @@
 	return 1;
 }
 
+#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""  
+#So, add in your httpd.conf following lines :
+#LogFormat "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" vhffs
+#CustomLog /var/log/apache2/ServerName/vhffs.log vhffs
+sub awstats_stats {
+	my $vhffs = shift;
+	return undef unless defined $vhffs;
+
+	my $webconf = $vhffs->get_config->get_service('web');
+	return undef unless defined $webconf;
+
+	my $log_incoming_root = $webconf->{'log_incoming_root'};
+	my $log_parsed_root = $webconf->{'log_parsed_root'};
+
+	unless( -d $log_incoming_root ) {
+		print 'ERROR: '.$log_incoming_root.' is not a directory'."\n";
+		return undef;
+	}
+	unless( -d $log_parsed_root ) {
+		print 'ERROR: '.$log_parsed_root.' is not a directory'."\n";
+		return undef;
+	}
+	unless( -x $webconf->{'log_awstats'} ) {
+		print 'ERROR: '.$webconf->{'log_awstats'}.' does no exist'."\n";
+		return undef;
+	}
+	unless( -f $webconf->{'log_awstats_sample'} ) {
+		print 'ERROR: cannot find the awstat sample at '.$webconf->{'log_awstats_sample'}."\n";
+		return undef;
+	}
+	if( $webconf->{'log_apachelocal'} and not -x $webconf->{'log_apachectl'} ) {
+		print 'ERROR: cannot find the apache2ctl binary at '.$webconf->{'log_apachectl'}."\n";
+		return undef;
+	}
+
+	my $webs = Vhffs::Services::Web::getall( $vhffs, Vhffs::Constants::ACTIVATED );
+	return undef unless defined $webs;
+
+	# Build a hash of all web sites names
+	my %websites;
+	foreach ( @{$webs} ) {
+		$websites{$_->get_servername} = $_;
+	}
+
+	# Build web servers list
+	my @webservers;
+	opendir( my $dirfd, $log_incoming_root );
+	foreach( readdir( $dirfd ) )  {
+		next if /^\./;
+		my $path = $log_incoming_root.'/'.$_;
+		next unless -d $path;
+		push @webservers, { name => $_, path => $path };
+	}
+	closedir( $dirfd );
+
+	# Rotate web servers logs
+	#
+	# All *.log files, I know that the suffix is hardcoded but I don't bother to add a configuration
+	# entry for that, it's already too complicated, and, who would like anything other than .log ?
+	foreach my $webserver ( @webservers ) {
+
+		opendir( my $dirfd, $webserver->{path} );
+		foreach( readdir( $dirfd ) )  {
+			next unless /\.log$/;
+			Vhffs::Robots::rotate_log( $webserver->{path}.'/'.$_, $webconf->{'log_incoming_rotations'}, $webconf->{'log_incoming_compress'} );
+		}
+		closedir( $dirfd );
+
+		# put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
+		open( my $filecycle , '>', $webserver->{path}.'/cycleok' );
+		close( $filecycle );
+	}
+
+	# Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
+	if( $webconf->{'log_apachelocal'} ) {
+		my $childpid = open( my $output, '-|', $webconf->{'log_apachectl'}, 'graceful' );
+		if($childpid) {
+			# read process output and discard
+			while(<$output>) {}
+
+			# wait for the child to finish
+			waitpid( $childpid, 0 );
+		}
+	} else {
+		sleep ( 180 );
+	}
+
+	# Deleting previous logs
+	unlink $log_incoming_root.'/mergedlog';
+	unlink $log_incoming_root.'/rejectlog';
+
+	# Merge all logs
+	open( my $mergedoutput, '>', $log_incoming_root.'/mergedlog' );
+	my $childpid = open( my $output, '-|', 'mergelog', ( map { $_->{path}.'/vhffs.log.0' if -f $_ } @webservers ) );
+	if($childpid) {
+		# read process output and print to destination
+		while(<$output>) { print $mergedoutput $_; }
+
+		# wait for the child to finish
+		waitpid( $childpid, 0 );
+	}
+	close( $mergedoutput );
+
+	# Parse http logs
+	my $prev = '';
+	my $fileout;
+
+	open( my $mergedin, '<', $log_incoming_root.'/mergedlog' );
+	open( my $rejectout, '>', $log_incoming_root.'/rejectlog' );
+
+	while( my $line = <$mergedin> ) {
+		( my ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) ) or next;
+
+		# Discard www
+		$svname =~ s/^www\.//;
+
+		my $web = $websites{$svname};
+
+		# We are _NOT_ hosting this website
+		unless( $web )  {
+			print $rejectout $svname.' '.$log."\n";
+			next;
+		}
+
+		# the website changed
+		if ( $prev ne $svname ) {
+			my $webdir = $log_parsed_root.'/'.$web->get_hash.'/logs';
+			unless( -d $webdir ) {
+				File::Path::make_path( $webdir );
+				chown( $web->get_owner_uid, $web->get_owner_gid, $webdir );
+				chmod( 0770, $webdir );
+			}
+			unless( -d $webdir ) {
+				close( $fileout ) if defined $fileout;
+				undef $fileout;
+				$prev = '';
+				next;
+			}
+
+			close( $fileout ) if defined $fileout;
+			open( $fileout, '>>', $webdir.'/access.log');
+
+			$prev = $svname;
+		}
+
+		print $fileout $log."\n";
+	}
+
+	close( $mergedin );
+	close( $rejectout );
+	close( $fileout ) if defined $fileout;
+
+	# Create a configuration file and generate statistic for each website
+	foreach my $web ( @{$webs} ) {
+		my $svname = $web->get_servername;
+
+		my $weblogdir = $log_parsed_root.'/'.$web->get_hash;
+		my $logpath = $weblogdir.'/logs/access.log';
+		my $datadir = $weblogdir.'/awstats';
+		my $conffile = $datadir.'/awstats.'.$svname.'.conf';
+
+		next unless -f $logpath;
+
+		unless( -d $datadir ) {
+			File::Path::make_path( $datadir );
+			chown( $web->get_owner_uid, $web->get_owner_gid, $datadir );  
+			chmod( 0775, $datadir );
+		}
+		unless( -d $datadir ) {
+			next;
+		}
+
+		# Create the config file
+		open( my $awfilein, '<', $webconf->{'log_awstats_sample'} );
+		open( my $awfileout, '>', $conffile );
+
+		while( <$awfilein> ) {
+			s/MY_DOMAINNAME/$svname/g;
+			s/MY_LOGPATH/$logpath/g;
+			s/MY_DATADIR/$datadir/g;
+			print $awfileout $_;
+		}
+
+		close( $awfileout );
+		close( $awfilein );
+
+		# Generate statistics
+		my $childpid = open( my $output, '-|', $webconf->{'log_awstats'}, '-config='.$svname, '-update' );
+		if($childpid) {
+			# read process output and discard
+			while(<$output>) {}
+
+			# wait for the child to finish
+			waitpid( $childpid, 0 );
+		}
+
+		# Rotate logs for this website
+		Vhffs::Robots::rotate_log( $logpath, $webconf->{'log_parsed_rotation'}, $webconf->{'log_parsed_compress'} );
+	}
+
+	return 1;
+}
+
 1;

Modified: trunk/vhffs-robots/src/repository_stats.pl
===================================================================
--- trunk/vhffs-robots/src/repository_stats.pl	2012-03-01 00:24:44 UTC (rev 2085)
+++ trunk/vhffs-robots/src/repository_stats.pl	2012-03-02 00:20:46 UTC (rev 2086)
@@ -29,230 +29,15 @@
 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 # POSSIBILITY OF SUCH DAMAGE.
 
-#TODO: Rework that...
-
-#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b"
-#So, add in your httpd.conf following lines :
-#LogFormat "%V %h %l %u %t \"%r\" %>s %b" vhffs
-#CustomLog /var/log/apache2/http.log vhffs
-
 use strict;
 use utf8;
-use File::Path;
 
 use lib '%VHFFS_LIB_DIR%';
-use Vhffs::Main;
-use Vhffs::Functions;
-use Vhffs::Services::Repository;
-use Vhffs::Robots;
+use Vhffs::Robots::Repository;
 
-
 my $vhffs = init Vhffs::Main;
-die "ERROR: Cannot init VHFFS !!!" if( ! defined $vhffs );
+exit 1 unless defined $vhffs;
 
-my $repos = Vhffs::Services::Repository::getall( $vhffs , Vhffs::Constants::ACTIVATED );
-die "ERROR: Cannot fetch the list of repository\n" if( ! defined $repos );
-
-my $repoconf = $vhffs->get_config->get_service('repository');
-die "ERROR: A full repository configuration is needed to generate stats\n" if ( ! defined $repoconf );
-
-my $log_incoming_root = $repoconf->{"log_incoming_root"};
-my $log_incoming_rotations = $repoconf->{"log_incoming_rotations"};
-my $log_incoming_compress = $repoconf->{"log_incoming_compress"};
-my $log_parsed_root = $repoconf->{"log_parsed_root"};
-my $log_parsed_rotation = $repoconf->{"log_parsed_rotation"};
-my $log_parsed_compress = $repoconf->{"log_parsed_compress"};
-my $log_awstats = $repoconf->{"log_awstats"};
-my $awstats_sample = $repoconf->{"log_awstats_sample"};
-my $log_apachelocal = $repoconf->{"log_apachelocal"};
-my $log_apachectl = $repoconf->{"log_apachectl"};
-
-die "ERROR: ".$log_incoming_root." is not a directory" if( ! -d $log_incoming_root );
-die "ERROR: ".$log_parsed_root." is not a directory" if( ! -d $log_parsed_root );
-die "ERROR: ".$log_awstats." does no exist" if( ! -f $log_awstats );
-die "ERROR: cannot find the awstat sample at ".$awstats_sample if( ! -f $awstats_sample );
-die "ERROR: cannot find the apache2ctl binary at ".$log_apachectl if( $log_apachelocal  &&  ! -f $log_apachectl );
-
-Vhffs::Robots::lock( $vhffs , "repositorystats" );
-
-my %repositorys;
-foreach ( @{$repos} )  {
-	$repositorys{$_->get_name} = 1;
-}
-
-my @downloadservers;
-my $downloadserver;
-my @files;
-
-# -- Rotate servers logs (all *.log files, I know that the suffix is hardcoded but I don't
-#    bother to add a configuration entry for that, it's already too complicated, and, who would
-#    like anything other than .log ? ).
-
-opendir( DIR , $log_incoming_root );
-@files = readdir( DIR );
-
-foreach( @files )  {
-	next if( /\./ );
-	if( -d $log_incoming_root."/".$_ )  {
-		push @downloadservers, $_;
-	}
-}
-closedir( DIR );
-
-foreach $downloadserver ( @downloadservers )  {
-	opendir ( DIR , $log_incoming_root."/".$downloadserver );
-	@files = readdir( DIR );
-
-	foreach ( @files )  {
-		if( /.*\.log$/ )  {
-			Vhffs::Robots::rotate_log( $log_incoming_root."/".$downloadserver."/".$_ , $log_incoming_rotations , $log_incoming_compress );
-		}
-	}
-
-	# put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
-	open( FILECYCLE , "> ".$log_incoming_root."/".$downloadserver."/cycleok" );
-	print FILECYCLE "";
-	close( FILECYCLE );
-}
-
-# Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
-if( $log_apachelocal )  {
-	my $childpid = open( my $output, '-|', $log_apachectl, 'graceful' );
-	if($childpid) {
-		# read process output and discard
-		while(<$output>) {}
-
-		# wait for the child to finish
-		waitpid( $childpid, 0 );
-	}
-} else {
-	sleep ( 180 );
-}
-
-# Deleting previous logs
-unlink $log_incoming_root."/mergedlog" if( -f $log_incoming_root."/mergedlog" );
-unlink $log_incoming_root."/rejectlog" if( -f $log_incoming_root."/rejectlog" );
-
-# Merge all logs
-open( my $mergedoutput, '>', $log_incoming_root.'/mergedlog' );
-my $childpid = open( my $output, '-|', 'mergelog', glob($log_incoming_root.'/*/http.log.0'), glob($log_incoming_root.'/*/ftp.log.0') );
-if($childpid) {
-	# read process output and print to destination
-	while(<$output>) { print $mergedoutput $_; }
-
-	# wait for the child to finish
-	waitpid( $childpid, 0 );
-}
-close( $mergedoutput );
-
-# Parse http logs
-my $prev = "";
-my $ddir = $vhffs->get_config->get_datadir."/repository/";
-$ddir =~ s%[/]{2,}%/%g;
-
-open( MERGEDIN , "< ".$log_incoming_root."/mergedlog" );
-open( REJECTOUT, "> ".$log_incoming_root."/rejectlog" );
-
-while( <MERGEDIN> )  {
-
-	my ( $remotehost , $rfc931 , $authuser , $date , $request , $status , $size , $referer , $useragent ) = ( $_ =~ /^([^\s]*)\s+([^\s]*)\s+([^\s]*)\s+\[([^\]]*)\]\s+\"([^\"]*)\"\s+([^\s]*)\s+([^\s]*)(?:\s+\"([^\"]*)\")?(?:\s+\"([^\"]*)\")?$/ );
-
-	next unless ( defined $remotehost && defined $rfc931 && defined $authuser && defined $date && defined $request && defined $status && defined $size );
-
-	# define referer and useragent (convert common to combined log)
-	$referer = '-' unless defined $referer;
-	$useragent = '-' unless defined $useragent;
-
-	# remove the "/data/repository/" part of the query
-	$request =~ s%$ddir/*%/%;
-
-	# remove the http:// part of the query if it exists
-	$request =~ s%http://[^/]+/*%/%;
-
-	# add HTTP/1.0 at the end of the query if needed
-	$request .= ' HTTP/1.0' if( $request && $request !~ /\ HTTP\/1.[01]$/ );
-
-	# fetch the group
-	my ( $area ) = ( $request =~ /^[^\/]*\/([^\/]+)/ ); 
-
-	# rebuild
-	my $log = $remotehost.' '.$rfc931.' '.$authuser.' ['.$date.'] "'.$request.'" '.$status.' '.$size.' "'.$referer.'" "'.$useragent.'"';
-
-	# append log line to the concerned download area
-	if ( defined $area && defined $log )  {
-
-		# We are _NOT_ hosting this repository
-		if( ! exists $repositorys{$area} )  {
-
-			print REJECTOUT $area." ".$log."\n";
-		}
-
-		# We host this repository
-		else  {
-			# the repository changed
-			if ( $prev ne $area )  {
-				my $dir = $log_parsed_root."/".$area."/logs";
-				# TODO: check make_path
-				File::Path::make_path( $dir ) unless -d $dir;
-
-				my $lff = $dir."/access.log";
-				close(FILEOUT);
-				open( FILEOUT , ">> ".$lff );
-				$prev = $area;
-			}
-
-			print FILEOUT $log."\n";
-		}
-	}
-}
-
-close(MERGEDIN);
-close(REJECTOUT);
-close(FILEOUT);
-
-
-# Create a configuration file and generate statistic for each website
-foreach ( @{$repos} )
-{
-	my $reponame = $_->get_name;
-
-	my $weblogdir = $log_parsed_root."/".$reponame;
-	my $logpath = $weblogdir."/logs/access.log";
-	my $datadir = $weblogdir."/awstats";
-	my $conffile = $datadir."/awstats.".$reponame.".conf";
-
-	next if ( ! -f $logpath );
-	# TODO: check make_path
-	File::Path::make_path( $datadir ) unless -d $datadir;
-
-	# Create the config file
-	open( AWFILEIN , "< ".$awstats_sample );
-	open( AWFILEOUT , "> ".$conffile );
-
-	while( my $line = <AWFILEIN> )
-	{
-		$line =~ s/MY_DOMAINNAME/$reponame/g;
-		$line =~ s/MY_LOGPATH/$logpath/g;
-		$line =~ s/MY_DATADIR/$datadir/g;
-		
-		print AWFILEOUT $line;
-	}
-
-	close( AWFILEOUT );
-	close( AWFILEIN );
-
-	# Generate statistics
-	my $childpid = open( my $output, '-|', $log_awstats, '-config='.$reponame, '-update' );
-	if($childpid) {
-		# read process output and discard
-		while(<$output>) {}
-
-		# wait for the child to finish
-		waitpid( $childpid, 0 );
-	}
-
-	# Rotate logs for this website
-	Vhffs::Robots::rotate_log( $logpath , $log_parsed_rotation , $log_parsed_compress );
-}
-
-Vhffs::Robots::unlock( $vhffs , "repositorystats" );
+Vhffs::Robots::lock( $vhffs, 'repositorystats' );
+Vhffs::Robots::Repository::awstats_stats( $vhffs );
+Vhffs::Robots::unlock( $vhffs, 'repositorystats' );

Modified: trunk/vhffs-robots/src/web_stats.pl
===================================================================
--- trunk/vhffs-robots/src/web_stats.pl	2012-03-01 00:24:44 UTC (rev 2085)
+++ trunk/vhffs-robots/src/web_stats.pl	2012-03-02 00:20:46 UTC (rev 2086)
@@ -29,218 +29,15 @@
 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 # POSSIBILITY OF SUCH DAMAGE.
 
-#TODO: Rework that...
-
-#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""  
-#So, add in your httpd.conf following lines :
-#LogFormat "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" vhffs
-#CustomLog /var/log/apache2/ServerName/vhffs.log vhffs
-
 use strict;
 use utf8;
-use File::Path;
 
 use lib '%VHFFS_LIB_DIR%';
-use Vhffs::Main;
-use Vhffs::Functions;
-use Vhffs::Services::Web;
-use Vhffs::Robots;
+use Vhffs::Robots::Web;
 
 my $vhffs = init Vhffs::Main;
-die "ERROR: Cannot init VHFFS !!!" if( ! defined $vhffs );
+exit 1 unless defined $vhffs;
 
-my $webs = Vhffs::Services::Web::getall( $vhffs , Vhffs::Constants::ACTIVATED );
-die "ERROR: Cannot fetch the list of websites\n" if( ! defined $webs );
-
-my $webconf = $vhffs->get_config->get_service('web');
-die "ERROR: A full web configuration is needed to generate stats\n" if ( ! defined $webconf );
-
-my $log_incoming_root = $webconf->{"log_incoming_root"};
-my $log_incoming_rotations = $webconf->{"log_incoming_rotations"};
-my $log_incoming_compress = $webconf->{"log_incoming_compress"};
-my $log_parsed_root = $webconf->{"log_parsed_root"};
-my $log_parsed_rotation = $webconf->{"log_parsed_rotation"};
-my $log_parsed_compress = $webconf->{"log_parsed_compress"};
-my $log_awstats = $webconf->{"log_awstats"};
-my $awstats_sample = $webconf->{"log_awstats_sample"};
-my $log_apachelocal = $webconf->{"log_apachelocal"};
-my $log_apachectl = $webconf->{"log_apachectl"};
-
-die "ERROR: ".$log_incoming_root." is not a directory" if( ! -d $log_incoming_root );
-die "ERROR: ".$log_parsed_root." is not a directory" if( ! -d $log_parsed_root );
-die "ERROR: ".$log_awstats." does no exist" if( ! -f $log_awstats );
-die "ERROR: cannot find the awstat sample at ".$awstats_sample if( ! -f $awstats_sample );
-die "ERROR: cannot find the apache2ctl binary at ".$log_apachectl if( $log_apachelocal  &&  ! -f $log_apachectl );
-
-Vhffs::Robots::lock( $vhffs , "webstats" );
-
-my %websites;
-foreach ( @{$webs} )  {
-	$websites{$_->get_servername} = $_;
-}
-
-my @webservers;
-my $webserver;
-my @files;
-
-# -- Rotate web servers logs (all *.log files, I know that the suffix is hardcoded but I don't
-#    bother to add a configuration entry for that, it's already too complicated, and, who would
-#    like anything other than .log ? ).
-
-opendir( DIR , $log_incoming_root );
-@files = readdir( DIR );
-
-foreach( @files )  {
-	next if( /\./ );
-	if( -d $log_incoming_root."/".$_ )  {
-		push @webservers, $_;
-	}
-}
-closedir( DIR );
-
-
-foreach $webserver ( @webservers )  {
-	opendir ( DIR , $log_incoming_root."/".$webserver );
-	@files = readdir( DIR );
-
-	foreach ( @files )  {
-		if( /.*\.log$/ )  {
-			Vhffs::Robots::rotate_log( $log_incoming_root."/".$webserver."/".$_ , $log_incoming_rotations , $log_incoming_compress );
-		}
-	}
-
-	# put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
-	open( FILECYCLE , "> ".$log_incoming_root."/".$webserver."/cycleok" );
-	print FILECYCLE "";
-	close( FILECYCLE );
-}
-
-# Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
-if( $log_apachelocal )  {
-	my $childpid = open( my $output, '-|', $log_apachectl, 'graceful' );
-	if($childpid) {
-		# read process output and discard
-		while(<$output>) {}
-
-		# wait for the child to finish
-		waitpid( $childpid, 0 );
-	}
-} else {
-	sleep ( 180 );
-}
-
-# Deleting previous logs
-unlink $log_incoming_root."/mergedlog" if( -f $log_incoming_root."/mergedlog" );
-unlink $log_incoming_root."/rejectlog" if( -f $log_incoming_root."/rejectlog" );
-
-# Merge all logs
-open( my $mergedoutput, '>', $log_incoming_root.'/mergedlog' );
-my $childpid = open( my $output, '-|', 'mergelog', glob($log_incoming_root.'/*/vhffs.log.0') );
-if($childpid) {
-	# read process output and print to destination
-	while(<$output>) { print $mergedoutput $_; }
-
-	# wait for the child to finish
-	waitpid( $childpid, 0 );
-}
-close( $mergedoutput );
-
-# Parse http logs
-my $prev = "";
-
-open( MERGEDIN , "< ".$log_incoming_root."/mergedlog" );
-open( REJECTOUT, "> ".$log_incoming_root."/rejectlog" );
-
-while( my $line = <MERGEDIN> )  {
-	if( my ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) )  {
-
-		# Discard www
-		$svname =~ s/^www\.//;
-
-		my $web = $websites{$svname};
-
-		# We are _NOT_ hosting this website
-		unless( $web )  {
-			print REJECTOUT $svname.' '.$log."\n";
-		}
-
-		# We host this website
-		else  {
-			# the website changed
-			if ( $prev ne $svname )  {
-				my $dir = $log_parsed_root.'/'.$web->get_hash.'/logs';
-				unless( -d $dir )  {
-					# TODO: check make_path
-					File::Path::make_path( $dir );
-					chown( $web->get_owner_uid, $web->get_owner_gid, $dir );
-					chmod( 0770 , $dir );
-				}
-
-				my $lff = $dir."/access.log";
-				close(FILEOUT);
-				open( FILEOUT , ">> ".$lff );
-
-				$prev = $svname;
-			}
-
-			print FILEOUT $log."\n";
-		}
-	}
-}
-
-close(MERGEDIN);
-close(REJECTOUT);
-close(FILEOUT);
-
-
-# Create a configuration file and generate statistic for each website
-foreach my $web ( @{$webs} )
-{
-	my $svname = $web->get_servername;
-
-	my $weblogdir = $log_parsed_root.'/'.$web->get_hash;
-	my $logpath = $weblogdir."/logs/access.log";
-	my $datadir = $weblogdir."/awstats";
-	my $conffile = $datadir."/awstats.".$svname.".conf";
-
-	#next if( -f $conffile );
-
-	next if ( ! -f $logpath );
-	unless( -d $datadir )  {
-		# TODO: check make_path
-		File::Path::make_path( $datadir );
-		chown( $web->get_owner_uid, $web->get_owner_gid, $datadir );  
-		chmod( 0775 , $datadir );
-	}
-
-	# Create the config file
-	open( AWFILEIN , "< ".$awstats_sample );
-	open( AWFILEOUT , "> ".$conffile );
-
-	while( my $line = <AWFILEIN> )
-	{
-		$line =~ s/MY_DOMAINNAME/$svname/g;
-		$line =~ s/MY_LOGPATH/$logpath/g;
-		$line =~ s/MY_DATADIR/$datadir/g;
-		
-		print AWFILEOUT $line;
-	}
-
-	close( AWFILEOUT );
-	close( AWFILEIN );
-
-	# Generate statistics
-	my $childpid = open( my $output, '-|', $log_awstats, '-config='.$svname, '-update' );
-	if($childpid) {
-		# read process output and discard
-		while(<$output>) {}
-
-		# wait for the child to finish
-		waitpid( $childpid, 0 );
-	}
-
-	# Rotate logs for this website
-	Vhffs::Robots::rotate_log( $logpath , $log_parsed_rotation , $log_parsed_compress );
-}
-
-Vhffs::Robots::unlock( $vhffs , "webstats" );
+Vhffs::Robots::lock( $vhffs, 'webstats' );
+Vhffs::Robots::Web::awstats_stats( $vhffs );
+Vhffs::Robots::unlock( $vhffs, 'webstats' );


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/