[vhffs-dev] [2086] reworked web stats and repository stats |
[ Thread Index |
Date Index
| More vhffs.org/vhffs-dev Archives
]
Revision: 2086
Author: gradator
Date: 2012-03-02 01:20:46 +0100 (Fri, 02 Mar 2012)
Log Message:
-----------
reworked web stats and repository stats
Modified Paths:
--------------
trunk/vhffs-api/src/Vhffs/Robots/Repository.pm
trunk/vhffs-api/src/Vhffs/Robots/Web.pm
trunk/vhffs-robots/src/repository_stats.pl
trunk/vhffs-robots/src/web_stats.pl
Modified: trunk/vhffs-api/src/Vhffs/Robots/Repository.pm
===================================================================
--- trunk/vhffs-api/src/Vhffs/Robots/Repository.pm 2012-03-01 00:24:44 UTC (rev 2085)
+++ trunk/vhffs-api/src/Vhffs/Robots/Repository.pm 2012-03-02 00:20:46 UTC (rev 2086)
@@ -160,4 +160,227 @@
Vhffs::Robots::vhffs_log( $vhffs, 'Updated quota used for repository '.$repository->get_name.' (gid '.$repository->get_owner_gid.') to '.$used.' MB');
}
+#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b"
+#So, add in your httpd.conf following lines :
+#LogFormat "%V %h %l %u %t \"%r\" %>s %b" vhffs
+#CustomLog /var/log/apache2/http.log vhffs
+sub awstats_stats {
+ my $vhffs = shift;
+ return undef unless defined $vhffs;
+
+ my $repoconf = $vhffs->get_config->get_service('repository');
+ return undef unless defined $repoconf;
+
+ my $log_incoming_root = $repoconf->{'log_incoming_root'};
+ my $log_parsed_root = $repoconf->{'log_parsed_root'};
+
+ unless( -d $log_incoming_root ) {
+ print 'ERROR: '.$log_incoming_root.' is not a directory'."\n";
+ return undef;
+ }
+ unless( -d $log_parsed_root ) {
+ print 'ERROR: '.$log_parsed_root.' is not a directory'."\n";
+ return undef;
+ }
+ unless( -x $repoconf->{'log_awstats'} ) {
+ print 'ERROR: '.$repoconf->{'log_awstats'}.' does no exist'."\n";
+ return undef;
+ }
+ unless( -f $repoconf->{'log_awstats_sample'} ) {
+ print 'ERROR: cannot find the awstat sample at '.$repoconf->{'log_awstats_sample'}."\n";
+ return undef;
+ }
+ if( $repoconf->{'log_apachelocal'} and not -x $repoconf->{'log_apachectl'} ) {
+ print 'ERROR: cannot find the apache2ctl binary at '.$repoconf->{'log_apachectl'}."\n";
+ return undef;
+ }
+
+ my $repos = Vhffs::Services::Repository::getall( $vhffs, Vhffs::Constants::ACTIVATED );
+ return undef unless defined $repos;
+
+ # Build a hash of all repositories names
+ my %repositorys;
+ foreach ( @{$repos} ) {
+ $repositorys{$_->get_name} = $_;
+ }
+
+ # Build downloads servers list
+ my @downloadservers;
+ opendir( my $dirfd, $log_incoming_root );
+ foreach( readdir( $dirfd ) ) {
+ next if /^\./;
+ my $path = $log_incoming_root.'/'.$_;
+ next unless -d $path;
+ push @downloadservers, { name => $_, path => $path };
+ }
+ closedir( $dirfd );
+
+ # Rotate downloads servers logs
+ #
+ # All *.log files, I know that the suffix is hardcoded but I don't bother to add a configuration
+ # entry for that, it's already too complicated, and, who would like anything other than .log ?
+ foreach my $downloadserver ( @downloadservers ) {
+
+ opendir( my $dirfd, $downloadserver->{path} );
+ foreach( readdir( $dirfd ) ) {
+ next unless /\.log$/;
+ Vhffs::Robots::rotate_log( $downloadserver->{path}.'/'.$_, $repoconf->{'log_incoming_rotations'}, $repoconf->{'log_incoming_compress'} );
+ }
+ closedir( $dirfd );
+
+ # put a file to tell downloadserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
+ open( my $filecycle , '>', $downloadserver->{path}.'/cycleok' );
+ close( $filecycle );
+ }
+
+ # Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
+ if( $repoconf->{'log_apachelocal'} ) {
+ my $childpid = open( my $output, '-|', $repoconf->{'log_apachectl'}, 'graceful' );
+ if($childpid) {
+ # read process output and discard
+ while(<$output>) {}
+
+ # wait for the child to finish
+ waitpid( $childpid, 0 );
+ }
+ } else {
+ sleep ( 180 );
+ }
+
+ # Deleting previous logs
+ unlink $log_incoming_root.'/mergedlog';
+ unlink $log_incoming_root.'/rejectlog';
+
+ # Merge all logs
+ open( my $mergedoutput, '>', $log_incoming_root.'/mergedlog' );
+ my $childpid = open( my $output, '-|', 'mergelog', ( map { $_->{path}.'/http.log.0' } @downloadservers ), ( map { $_->{path}.'/ftp.log.0' } @downloadservers ) );
+ if($childpid) {
+ # read process output and print to destination
+ while(<$output>) { print $mergedoutput $_; }
+
+ # wait for the child to finish
+ waitpid( $childpid, 0 );
+ }
+ close( $mergedoutput );
+
+ # Parse http logs
+ my $prev = '';
+ my $fileout;
+ my $ddir = $vhffs->get_config->get_datadir.'/repository/';
+ $ddir =~ s%[/]{2,}%/%g;
+
+ open( my $mergedin, '<', $log_incoming_root.'/mergedlog' );
+ open( my $rejectout, '>', $log_incoming_root.'/rejectlog' );
+
+ while( <$mergedin> ) {
+
+ my ( $remotehost, $rfc931, $authuser, $date, $request, $status, $size, $referer, $useragent ) = ( $_ =~ /^([^\s]*)\s+([^\s]*)\s+([^\s]*)\s+\[([^\]]*)\]\s+\"([^\"]*)\"\s+([^\s]*)\s+([^\s]*)(?:\s+\"([^\"]*)\")?(?:\s+\"([^\"]*)\")?$/ );
+ next unless defined $remotehost and defined $rfc931 and defined $authuser and defined $date and defined $request and defined $status and defined $size;
+
+ # define referer and useragent (convert common to combined log)
+ $referer = '-' unless defined $referer;
+ $useragent = '-' unless defined $useragent;
+
+ # remove the "/data/repository/" part of the query
+ $request =~ s%$ddir/*%/%;
+
+ # remove the http:// part of the query if it exists
+ $request =~ s%http://[^/]+/*%/%;
+
+ # add HTTP/1.0 at the end of the query if needed
+ $request .= ' HTTP/1.0' if( $request && $request !~ /\ HTTP\/1.[01]$/ );
+
+ # fetch the group
+ my ( $area ) = ( $request =~ /^[^\/]*\/([^\/]+)/ );
+
+ # rebuild
+ my $log = $remotehost.' '.$rfc931.' '.$authuser.' ['.$date.'] "'.$request.'" '.$status.' '.$size.' "'.$referer.'" "'.$useragent.'"';
+
+ # append log line to the concerned download area
+ next unless defined $area and defined $log;
+
+ my $repository = $repositorys{$area};
+
+ # We are _NOT_ hosting this repository
+ unless( $repository ) {
+ print $rejectout $area.' '.$log."\n";
+ next;
+ }
+
+ # the repository changed
+ if ( $prev ne $area ) {
+ my $repodir = $log_parsed_root.'/'.$area.'/logs';
+ unless( -d $repodir ) {
+ File::Path::make_path( $repodir );
+ #chown( $repository->get_owner_uid, $repository->get_owner_gid, $repodir );
+ #chmod( 0770, $repodir );
+ }
+ unless( -d $repodir ) {
+ close( $fileout ) if defined $fileout;
+ undef $fileout;
+ $prev = '';
+ next;
+ }
+
+ close( $fileout ) if defined $fileout;
+ open( $fileout, '>>', $repodir.'/access.log');
+
+ $prev = $area;
+ }
+
+ print $fileout $log."\n";
+ }
+
+ close( $mergedin );
+ close( $rejectout );
+ close( $fileout ) if defined $fileout;
+
+ # Create a configuration file and generate statistic for each website
+ foreach ( @{$repos} ) {
+ my $reponame = $_->get_name;
+
+ my $weblogdir = $log_parsed_root.'/'.$reponame;
+ my $logpath = $weblogdir.'/logs/access.log';
+ my $datadir = $weblogdir.'/awstats';
+ my $conffile = $datadir.'/awstats.'.$reponame.'.conf';
+
+ next unless -f $logpath;
+ unless( -d $datadir ) {
+ File::Path::make_path( $datadir );
+ }
+ unless( -d $datadir ) {
+ next;
+ }
+
+ # Create the config file
+ open( my $awfilein, '<', $repoconf->{'log_awstats_sample'} );
+ open( my $awfileout, '>', $conffile );
+
+ while( <$awfilein> ) {
+ s/MY_DOMAINNAME/$reponame/g;
+ s/MY_LOGPATH/$logpath/g;
+ s/MY_DATADIR/$datadir/g;
+ print $awfileout $_;
+ }
+
+ close( $awfileout );
+ close( $awfilein );
+
+ # Generate statistics
+ my $childpid = open( my $output, '-|', $repoconf->{'log_awstats'}, '-config='.$reponame, '-update' );
+ if($childpid) {
+ # read process output and discard
+ while(<$output>) {}
+
+ # wait for the child to finish
+ waitpid( $childpid, 0 );
+ }
+
+ # Rotate logs for this website
+ Vhffs::Robots::rotate_log( $logpath, $repoconf->{'log_parsed_rotation'}, $repoconf->{'log_parsed_compress'} );
+ }
+
+ return 1;
+}
+
1;
Modified: trunk/vhffs-api/src/Vhffs/Robots/Web.pm
===================================================================
--- trunk/vhffs-api/src/Vhffs/Robots/Web.pm 2012-03-01 00:24:44 UTC (rev 2085)
+++ trunk/vhffs-api/src/Vhffs/Robots/Web.pm 2012-03-02 00:20:46 UTC (rev 2086)
@@ -132,4 +132,207 @@
return 1;
}
+#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""
+#So, add in your httpd.conf following lines :
+#LogFormat "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" vhffs
+#CustomLog /var/log/apache2/ServerName/vhffs.log vhffs
+sub awstats_stats {
+ my $vhffs = shift;
+ return undef unless defined $vhffs;
+
+ my $webconf = $vhffs->get_config->get_service('web');
+ return undef unless defined $webconf;
+
+ my $log_incoming_root = $webconf->{'log_incoming_root'};
+ my $log_parsed_root = $webconf->{'log_parsed_root'};
+
+ unless( -d $log_incoming_root ) {
+ print 'ERROR: '.$log_incoming_root.' is not a directory'."\n";
+ return undef;
+ }
+ unless( -d $log_parsed_root ) {
+ print 'ERROR: '.$log_parsed_root.' is not a directory'."\n";
+ return undef;
+ }
+ unless( -x $webconf->{'log_awstats'} ) {
+ print 'ERROR: '.$webconf->{'log_awstats'}.' does no exist'."\n";
+ return undef;
+ }
+ unless( -f $webconf->{'log_awstats_sample'} ) {
+ print 'ERROR: cannot find the awstat sample at '.$webconf->{'log_awstats_sample'}."\n";
+ return undef;
+ }
+ if( $webconf->{'log_apachelocal'} and not -x $webconf->{'log_apachectl'} ) {
+ print 'ERROR: cannot find the apache2ctl binary at '.$webconf->{'log_apachectl'}."\n";
+ return undef;
+ }
+
+ my $webs = Vhffs::Services::Web::getall( $vhffs, Vhffs::Constants::ACTIVATED );
+ return undef unless defined $webs;
+
+ # Build a hash of all web sites names
+ my %websites;
+ foreach ( @{$webs} ) {
+ $websites{$_->get_servername} = $_;
+ }
+
+ # Build web servers list
+ my @webservers;
+ opendir( my $dirfd, $log_incoming_root );
+ foreach( readdir( $dirfd ) ) {
+ next if /^\./;
+ my $path = $log_incoming_root.'/'.$_;
+ next unless -d $path;
+ push @webservers, { name => $_, path => $path };
+ }
+ closedir( $dirfd );
+
+ # Rotate web servers logs
+ #
+ # All *.log files, I know that the suffix is hardcoded but I don't bother to add a configuration
+ # entry for that, it's already too complicated, and, who would like anything other than .log ?
+ foreach my $webserver ( @webservers ) {
+
+ opendir( my $dirfd, $webserver->{path} );
+ foreach( readdir( $dirfd ) ) {
+ next unless /\.log$/;
+ Vhffs::Robots::rotate_log( $webserver->{path}.'/'.$_, $webconf->{'log_incoming_rotations'}, $webconf->{'log_incoming_compress'} );
+ }
+ closedir( $dirfd );
+
+ # put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
+ open( my $filecycle , '>', $webserver->{path}.'/cycleok' );
+ close( $filecycle );
+ }
+
+ # Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
+ if( $webconf->{'log_apachelocal'} ) {
+ my $childpid = open( my $output, '-|', $webconf->{'log_apachectl'}, 'graceful' );
+ if($childpid) {
+ # read process output and discard
+ while(<$output>) {}
+
+ # wait for the child to finish
+ waitpid( $childpid, 0 );
+ }
+ } else {
+ sleep ( 180 );
+ }
+
+ # Deleting previous logs
+ unlink $log_incoming_root.'/mergedlog';
+ unlink $log_incoming_root.'/rejectlog';
+
+ # Merge all logs
+ open( my $mergedoutput, '>', $log_incoming_root.'/mergedlog' );
+ my $childpid = open( my $output, '-|', 'mergelog', ( map { $_->{path}.'/vhffs.log.0' if -f $_ } @webservers ) );
+ if($childpid) {
+ # read process output and print to destination
+ while(<$output>) { print $mergedoutput $_; }
+
+ # wait for the child to finish
+ waitpid( $childpid, 0 );
+ }
+ close( $mergedoutput );
+
+ # Parse http logs
+ my $prev = '';
+ my $fileout;
+
+ open( my $mergedin, '<', $log_incoming_root.'/mergedlog' );
+ open( my $rejectout, '>', $log_incoming_root.'/rejectlog' );
+
+ while( my $line = <$mergedin> ) {
+ ( my ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) ) or next;
+
+ # Discard www
+ $svname =~ s/^www\.//;
+
+ my $web = $websites{$svname};
+
+ # We are _NOT_ hosting this website
+ unless( $web ) {
+ print $rejectout $svname.' '.$log."\n";
+ next;
+ }
+
+ # the website changed
+ if ( $prev ne $svname ) {
+ my $webdir = $log_parsed_root.'/'.$web->get_hash.'/logs';
+ unless( -d $webdir ) {
+ File::Path::make_path( $webdir );
+ chown( $web->get_owner_uid, $web->get_owner_gid, $webdir );
+ chmod( 0770, $webdir );
+ }
+ unless( -d $webdir ) {
+ close( $fileout ) if defined $fileout;
+ undef $fileout;
+ $prev = '';
+ next;
+ }
+
+ close( $fileout ) if defined $fileout;
+ open( $fileout, '>>', $webdir.'/access.log');
+
+ $prev = $svname;
+ }
+
+ print $fileout $log."\n";
+ }
+
+ close( $mergedin );
+ close( $rejectout );
+ close( $fileout ) if defined $fileout;
+
+ # Create a configuration file and generate statistic for each website
+ foreach my $web ( @{$webs} ) {
+ my $svname = $web->get_servername;
+
+ my $weblogdir = $log_parsed_root.'/'.$web->get_hash;
+ my $logpath = $weblogdir.'/logs/access.log';
+ my $datadir = $weblogdir.'/awstats';
+ my $conffile = $datadir.'/awstats.'.$svname.'.conf';
+
+ next unless -f $logpath;
+
+ unless( -d $datadir ) {
+ File::Path::make_path( $datadir );
+ chown( $web->get_owner_uid, $web->get_owner_gid, $datadir );
+ chmod( 0775, $datadir );
+ }
+ unless( -d $datadir ) {
+ next;
+ }
+
+ # Create the config file
+ open( my $awfilein, '<', $webconf->{'log_awstats_sample'} );
+ open( my $awfileout, '>', $conffile );
+
+ while( <$awfilein> ) {
+ s/MY_DOMAINNAME/$svname/g;
+ s/MY_LOGPATH/$logpath/g;
+ s/MY_DATADIR/$datadir/g;
+ print $awfileout $_;
+ }
+
+ close( $awfileout );
+ close( $awfilein );
+
+ # Generate statistics
+ my $childpid = open( my $output, '-|', $webconf->{'log_awstats'}, '-config='.$svname, '-update' );
+ if($childpid) {
+ # read process output and discard
+ while(<$output>) {}
+
+ # wait for the child to finish
+ waitpid( $childpid, 0 );
+ }
+
+ # Rotate logs for this website
+ Vhffs::Robots::rotate_log( $logpath, $webconf->{'log_parsed_rotation'}, $webconf->{'log_parsed_compress'} );
+ }
+
+ return 1;
+}
+
1;
Modified: trunk/vhffs-robots/src/repository_stats.pl
===================================================================
--- trunk/vhffs-robots/src/repository_stats.pl 2012-03-01 00:24:44 UTC (rev 2085)
+++ trunk/vhffs-robots/src/repository_stats.pl 2012-03-02 00:20:46 UTC (rev 2086)
@@ -29,230 +29,15 @@
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
-#TODO: Rework that...
-
-#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b"
-#So, add in your httpd.conf following lines :
-#LogFormat "%V %h %l %u %t \"%r\" %>s %b" vhffs
-#CustomLog /var/log/apache2/http.log vhffs
-
use strict;
use utf8;
-use File::Path;
use lib '%VHFFS_LIB_DIR%';
-use Vhffs::Main;
-use Vhffs::Functions;
-use Vhffs::Services::Repository;
-use Vhffs::Robots;
+use Vhffs::Robots::Repository;
-
my $vhffs = init Vhffs::Main;
-die "ERROR: Cannot init VHFFS !!!" if( ! defined $vhffs );
+exit 1 unless defined $vhffs;
-my $repos = Vhffs::Services::Repository::getall( $vhffs , Vhffs::Constants::ACTIVATED );
-die "ERROR: Cannot fetch the list of repository\n" if( ! defined $repos );
-
-my $repoconf = $vhffs->get_config->get_service('repository');
-die "ERROR: A full repository configuration is needed to generate stats\n" if ( ! defined $repoconf );
-
-my $log_incoming_root = $repoconf->{"log_incoming_root"};
-my $log_incoming_rotations = $repoconf->{"log_incoming_rotations"};
-my $log_incoming_compress = $repoconf->{"log_incoming_compress"};
-my $log_parsed_root = $repoconf->{"log_parsed_root"};
-my $log_parsed_rotation = $repoconf->{"log_parsed_rotation"};
-my $log_parsed_compress = $repoconf->{"log_parsed_compress"};
-my $log_awstats = $repoconf->{"log_awstats"};
-my $awstats_sample = $repoconf->{"log_awstats_sample"};
-my $log_apachelocal = $repoconf->{"log_apachelocal"};
-my $log_apachectl = $repoconf->{"log_apachectl"};
-
-die "ERROR: ".$log_incoming_root." is not a directory" if( ! -d $log_incoming_root );
-die "ERROR: ".$log_parsed_root." is not a directory" if( ! -d $log_parsed_root );
-die "ERROR: ".$log_awstats." does no exist" if( ! -f $log_awstats );
-die "ERROR: cannot find the awstat sample at ".$awstats_sample if( ! -f $awstats_sample );
-die "ERROR: cannot find the apache2ctl binary at ".$log_apachectl if( $log_apachelocal && ! -f $log_apachectl );
-
-Vhffs::Robots::lock( $vhffs , "repositorystats" );
-
-my %repositorys;
-foreach ( @{$repos} ) {
- $repositorys{$_->get_name} = 1;
-}
-
-my @downloadservers;
-my $downloadserver;
-my @files;
-
-# -- Rotate servers logs (all *.log files, I know that the suffix is hardcoded but I don't
-# bother to add a configuration entry for that, it's already too complicated, and, who would
-# like anything other than .log ? ).
-
-opendir( DIR , $log_incoming_root );
-@files = readdir( DIR );
-
-foreach( @files ) {
- next if( /\./ );
- if( -d $log_incoming_root."/".$_ ) {
- push @downloadservers, $_;
- }
-}
-closedir( DIR );
-
-foreach $downloadserver ( @downloadservers ) {
- opendir ( DIR , $log_incoming_root."/".$downloadserver );
- @files = readdir( DIR );
-
- foreach ( @files ) {
- if( /.*\.log$/ ) {
- Vhffs::Robots::rotate_log( $log_incoming_root."/".$downloadserver."/".$_ , $log_incoming_rotations , $log_incoming_compress );
- }
- }
-
- # put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
- open( FILECYCLE , "> ".$log_incoming_root."/".$downloadserver."/cycleok" );
- print FILECYCLE "";
- close( FILECYCLE );
-}
-
-# Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
-if( $log_apachelocal ) {
- my $childpid = open( my $output, '-|', $log_apachectl, 'graceful' );
- if($childpid) {
- # read process output and discard
- while(<$output>) {}
-
- # wait for the child to finish
- waitpid( $childpid, 0 );
- }
-} else {
- sleep ( 180 );
-}
-
-# Deleting previous logs
-unlink $log_incoming_root."/mergedlog" if( -f $log_incoming_root."/mergedlog" );
-unlink $log_incoming_root."/rejectlog" if( -f $log_incoming_root."/rejectlog" );
-
-# Merge all logs
-open( my $mergedoutput, '>', $log_incoming_root.'/mergedlog' );
-my $childpid = open( my $output, '-|', 'mergelog', glob($log_incoming_root.'/*/http.log.0'), glob($log_incoming_root.'/*/ftp.log.0') );
-if($childpid) {
- # read process output and print to destination
- while(<$output>) { print $mergedoutput $_; }
-
- # wait for the child to finish
- waitpid( $childpid, 0 );
-}
-close( $mergedoutput );
-
-# Parse http logs
-my $prev = "";
-my $ddir = $vhffs->get_config->get_datadir."/repository/";
-$ddir =~ s%[/]{2,}%/%g;
-
-open( MERGEDIN , "< ".$log_incoming_root."/mergedlog" );
-open( REJECTOUT, "> ".$log_incoming_root."/rejectlog" );
-
-while( <MERGEDIN> ) {
-
- my ( $remotehost , $rfc931 , $authuser , $date , $request , $status , $size , $referer , $useragent ) = ( $_ =~ /^([^\s]*)\s+([^\s]*)\s+([^\s]*)\s+\[([^\]]*)\]\s+\"([^\"]*)\"\s+([^\s]*)\s+([^\s]*)(?:\s+\"([^\"]*)\")?(?:\s+\"([^\"]*)\")?$/ );
-
- next unless ( defined $remotehost && defined $rfc931 && defined $authuser && defined $date && defined $request && defined $status && defined $size );
-
- # define referer and useragent (convert common to combined log)
- $referer = '-' unless defined $referer;
- $useragent = '-' unless defined $useragent;
-
- # remove the "/data/repository/" part of the query
- $request =~ s%$ddir/*%/%;
-
- # remove the http:// part of the query if it exists
- $request =~ s%http://[^/]+/*%/%;
-
- # add HTTP/1.0 at the end of the query if needed
- $request .= ' HTTP/1.0' if( $request && $request !~ /\ HTTP\/1.[01]$/ );
-
- # fetch the group
- my ( $area ) = ( $request =~ /^[^\/]*\/([^\/]+)/ );
-
- # rebuild
- my $log = $remotehost.' '.$rfc931.' '.$authuser.' ['.$date.'] "'.$request.'" '.$status.' '.$size.' "'.$referer.'" "'.$useragent.'"';
-
- # append log line to the concerned download area
- if ( defined $area && defined $log ) {
-
- # We are _NOT_ hosting this repository
- if( ! exists $repositorys{$area} ) {
-
- print REJECTOUT $area." ".$log."\n";
- }
-
- # We host this repository
- else {
- # the repository changed
- if ( $prev ne $area ) {
- my $dir = $log_parsed_root."/".$area."/logs";
- # TODO: check make_path
- File::Path::make_path( $dir ) unless -d $dir;
-
- my $lff = $dir."/access.log";
- close(FILEOUT);
- open( FILEOUT , ">> ".$lff );
- $prev = $area;
- }
-
- print FILEOUT $log."\n";
- }
- }
-}
-
-close(MERGEDIN);
-close(REJECTOUT);
-close(FILEOUT);
-
-
-# Create a configuration file and generate statistic for each website
-foreach ( @{$repos} )
-{
- my $reponame = $_->get_name;
-
- my $weblogdir = $log_parsed_root."/".$reponame;
- my $logpath = $weblogdir."/logs/access.log";
- my $datadir = $weblogdir."/awstats";
- my $conffile = $datadir."/awstats.".$reponame.".conf";
-
- next if ( ! -f $logpath );
- # TODO: check make_path
- File::Path::make_path( $datadir ) unless -d $datadir;
-
- # Create the config file
- open( AWFILEIN , "< ".$awstats_sample );
- open( AWFILEOUT , "> ".$conffile );
-
- while( my $line = <AWFILEIN> )
- {
- $line =~ s/MY_DOMAINNAME/$reponame/g;
- $line =~ s/MY_LOGPATH/$logpath/g;
- $line =~ s/MY_DATADIR/$datadir/g;
-
- print AWFILEOUT $line;
- }
-
- close( AWFILEOUT );
- close( AWFILEIN );
-
- # Generate statistics
- my $childpid = open( my $output, '-|', $log_awstats, '-config='.$reponame, '-update' );
- if($childpid) {
- # read process output and discard
- while(<$output>) {}
-
- # wait for the child to finish
- waitpid( $childpid, 0 );
- }
-
- # Rotate logs for this website
- Vhffs::Robots::rotate_log( $logpath , $log_parsed_rotation , $log_parsed_compress );
-}
-
-Vhffs::Robots::unlock( $vhffs , "repositorystats" );
+Vhffs::Robots::lock( $vhffs, 'repositorystats' );
+Vhffs::Robots::Repository::awstats_stats( $vhffs );
+Vhffs::Robots::unlock( $vhffs, 'repositorystats' );
Modified: trunk/vhffs-robots/src/web_stats.pl
===================================================================
--- trunk/vhffs-robots/src/web_stats.pl 2012-03-01 00:24:44 UTC (rev 2085)
+++ trunk/vhffs-robots/src/web_stats.pl 2012-03-02 00:20:46 UTC (rev 2086)
@@ -29,218 +29,15 @@
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
-#TODO: Rework that...
-
-#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""
-#So, add in your httpd.conf following lines :
-#LogFormat "%V %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" vhffs
-#CustomLog /var/log/apache2/ServerName/vhffs.log vhffs
-
use strict;
use utf8;
-use File::Path;
use lib '%VHFFS_LIB_DIR%';
-use Vhffs::Main;
-use Vhffs::Functions;
-use Vhffs::Services::Web;
-use Vhffs::Robots;
+use Vhffs::Robots::Web;
my $vhffs = init Vhffs::Main;
-die "ERROR: Cannot init VHFFS !!!" if( ! defined $vhffs );
+exit 1 unless defined $vhffs;
-my $webs = Vhffs::Services::Web::getall( $vhffs , Vhffs::Constants::ACTIVATED );
-die "ERROR: Cannot fetch the list of websites\n" if( ! defined $webs );
-
-my $webconf = $vhffs->get_config->get_service('web');
-die "ERROR: A full web configuration is needed to generate stats\n" if ( ! defined $webconf );
-
-my $log_incoming_root = $webconf->{"log_incoming_root"};
-my $log_incoming_rotations = $webconf->{"log_incoming_rotations"};
-my $log_incoming_compress = $webconf->{"log_incoming_compress"};
-my $log_parsed_root = $webconf->{"log_parsed_root"};
-my $log_parsed_rotation = $webconf->{"log_parsed_rotation"};
-my $log_parsed_compress = $webconf->{"log_parsed_compress"};
-my $log_awstats = $webconf->{"log_awstats"};
-my $awstats_sample = $webconf->{"log_awstats_sample"};
-my $log_apachelocal = $webconf->{"log_apachelocal"};
-my $log_apachectl = $webconf->{"log_apachectl"};
-
-die "ERROR: ".$log_incoming_root." is not a directory" if( ! -d $log_incoming_root );
-die "ERROR: ".$log_parsed_root." is not a directory" if( ! -d $log_parsed_root );
-die "ERROR: ".$log_awstats." does no exist" if( ! -f $log_awstats );
-die "ERROR: cannot find the awstat sample at ".$awstats_sample if( ! -f $awstats_sample );
-die "ERROR: cannot find the apache2ctl binary at ".$log_apachectl if( $log_apachelocal && ! -f $log_apachectl );
-
-Vhffs::Robots::lock( $vhffs , "webstats" );
-
-my %websites;
-foreach ( @{$webs} ) {
- $websites{$_->get_servername} = $_;
-}
-
-my @webservers;
-my $webserver;
-my @files;
-
-# -- Rotate web servers logs (all *.log files, I know that the suffix is hardcoded but I don't
-# bother to add a configuration entry for that, it's already too complicated, and, who would
-# like anything other than .log ? ).
-
-opendir( DIR , $log_incoming_root );
-@files = readdir( DIR );
-
-foreach( @files ) {
- next if( /\./ );
- if( -d $log_incoming_root."/".$_ ) {
- push @webservers, $_;
- }
-}
-closedir( DIR );
-
-
-foreach $webserver ( @webservers ) {
- opendir ( DIR , $log_incoming_root."/".$webserver );
- @files = readdir( DIR );
-
- foreach ( @files ) {
- if( /.*\.log$/ ) {
- Vhffs::Robots::rotate_log( $log_incoming_root."/".$webserver."/".$_ , $log_incoming_rotations , $log_incoming_compress );
- }
- }
-
- # put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
- open( FILECYCLE , "> ".$log_incoming_root."/".$webserver."/cycleok" );
- print FILECYCLE "";
- close( FILECYCLE );
-}
-
-# Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
-if( $log_apachelocal ) {
- my $childpid = open( my $output, '-|', $log_apachectl, 'graceful' );
- if($childpid) {
- # read process output and discard
- while(<$output>) {}
-
- # wait for the child to finish
- waitpid( $childpid, 0 );
- }
-} else {
- sleep ( 180 );
-}
-
-# Deleting previous logs
-unlink $log_incoming_root."/mergedlog" if( -f $log_incoming_root."/mergedlog" );
-unlink $log_incoming_root."/rejectlog" if( -f $log_incoming_root."/rejectlog" );
-
-# Merge all logs
-open( my $mergedoutput, '>', $log_incoming_root.'/mergedlog' );
-my $childpid = open( my $output, '-|', 'mergelog', glob($log_incoming_root.'/*/vhffs.log.0') );
-if($childpid) {
- # read process output and print to destination
- while(<$output>) { print $mergedoutput $_; }
-
- # wait for the child to finish
- waitpid( $childpid, 0 );
-}
-close( $mergedoutput );
-
-# Parse http logs
-my $prev = "";
-
-open( MERGEDIN , "< ".$log_incoming_root."/mergedlog" );
-open( REJECTOUT, "> ".$log_incoming_root."/rejectlog" );
-
-while( my $line = <MERGEDIN> ) {
- if( my ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) ) {
-
- # Discard www
- $svname =~ s/^www\.//;
-
- my $web = $websites{$svname};
-
- # We are _NOT_ hosting this website
- unless( $web ) {
- print REJECTOUT $svname.' '.$log."\n";
- }
-
- # We host this website
- else {
- # the website changed
- if ( $prev ne $svname ) {
- my $dir = $log_parsed_root.'/'.$web->get_hash.'/logs';
- unless( -d $dir ) {
- # TODO: check make_path
- File::Path::make_path( $dir );
- chown( $web->get_owner_uid, $web->get_owner_gid, $dir );
- chmod( 0770 , $dir );
- }
-
- my $lff = $dir."/access.log";
- close(FILEOUT);
- open( FILEOUT , ">> ".$lff );
-
- $prev = $svname;
- }
-
- print FILEOUT $log."\n";
- }
- }
-}
-
-close(MERGEDIN);
-close(REJECTOUT);
-close(FILEOUT);
-
-
-# Create a configuration file and generate statistic for each website
-foreach my $web ( @{$webs} )
-{
- my $svname = $web->get_servername;
-
- my $weblogdir = $log_parsed_root.'/'.$web->get_hash;
- my $logpath = $weblogdir."/logs/access.log";
- my $datadir = $weblogdir."/awstats";
- my $conffile = $datadir."/awstats.".$svname.".conf";
-
- #next if( -f $conffile );
-
- next if ( ! -f $logpath );
- unless( -d $datadir ) {
- # TODO: check make_path
- File::Path::make_path( $datadir );
- chown( $web->get_owner_uid, $web->get_owner_gid, $datadir );
- chmod( 0775 , $datadir );
- }
-
- # Create the config file
- open( AWFILEIN , "< ".$awstats_sample );
- open( AWFILEOUT , "> ".$conffile );
-
- while( my $line = <AWFILEIN> )
- {
- $line =~ s/MY_DOMAINNAME/$svname/g;
- $line =~ s/MY_LOGPATH/$logpath/g;
- $line =~ s/MY_DATADIR/$datadir/g;
-
- print AWFILEOUT $line;
- }
-
- close( AWFILEOUT );
- close( AWFILEIN );
-
- # Generate statistics
- my $childpid = open( my $output, '-|', $log_awstats, '-config='.$svname, '-update' );
- if($childpid) {
- # read process output and discard
- while(<$output>) {}
-
- # wait for the child to finish
- waitpid( $childpid, 0 );
- }
-
- # Rotate logs for this website
- Vhffs::Robots::rotate_log( $logpath , $log_parsed_rotation , $log_parsed_compress );
-}
-
-Vhffs::Robots::unlock( $vhffs , "webstats" );
+Vhffs::Robots::lock( $vhffs, 'webstats' );
+Vhffs::Robots::Web::awstats_stats( $vhffs );
+Vhffs::Robots::unlock( $vhffs, 'webstats' );