[vhffs-dev] [411] Started a new way to generate and stores stats, not finished yet but I much catch some sleep now, cya later guys:-) |
[ Thread Index |
Date Index
| More vhffs.org/vhffs-dev Archives
]
- To: vhffs-dev@xxxxxxxxx
- Subject: [vhffs-dev] [411] Started a new way to generate and stores stats, not finished yet but I much catch some sleep now, cya later guys:-)
- From: subversion@xxxxxxxxx
- Date: Sat, 20 Jan 2007 10:25:12 +0100
Revision: 411
Author: gradator
Date: 2007-01-20 09:25:11 +0000 (Sat, 20 Jan 2007)
Log Message:
-----------
Started a new way to generate and stores stats, not finished yet but I much catch some sleep now, cya later guys:-)
Modified Paths:
--------------
trunk/vhffs-api/src/Vhffs/Conf.pm
trunk/vhffs-api/src/Vhffs/Functions.pm
trunk/vhffs-backend/conf/vhffs.conf
trunk/vhffs-robots/src/generate_webstats.sh
trunk/vhffs-robots/src/parse_httplog.pl
Modified: trunk/vhffs-api/src/Vhffs/Conf.pm
===================================================================
--- trunk/vhffs-api/src/Vhffs/Conf.pm 2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-api/src/Vhffs/Conf.pm 2007-01-20 09:25:11 UTC (rev 411)
@@ -927,4 +927,19 @@
}
+sub get_web_config
+{
+ my $webconf = $Config{"services"}{"web"};
+
+ if( defined $webconf )
+ {
+ return $webconf;
+ }
+ else
+ {
+ return undef;
+ }
+}
+
+
1;
Modified: trunk/vhffs-api/src/Vhffs/Functions.pm
===================================================================
--- trunk/vhffs-api/src/Vhffs/Functions.pm 2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-api/src/Vhffs/Functions.pm 2007-01-20 09:25:11 UTC (rev 411)
@@ -448,4 +448,69 @@
}
+sub rotate_log
+{
+ use File::Basename;
+
+ my $path = shift;
+ my $rotation = shift;
+ my $compress = shift;
+ my $dir = dirname ( $path );
+ my $file = basename ( $path );
+
+ return if ( ! -f $path );
+
+ # remove files out of the rotation
+ opendir ( DIR , $dir );
+ my @files = readdir( DIR );
+ foreach ( @files )
+ {
+ next if( $file ne substr($_, 0, length( $file ) ) );
+ my $suffix = substr($_, length( $file ) );
+ $suffix =~ s/^.//;
+ $suffix =~ s/.gz$//;
+ next if( ! ( $suffix =~ /\d+/ ) );
+ unlink $dir."/".$_ if( $suffix >= $rotation-1 );
+ }
+
+ # rotate logs
+ my $i;
+ for ( $i = $rotation-2 ; $i >= 0 ; $i-- ) {
+ my $j = $i +1;
+
+ # found a file which is not compressed
+ if ( -f $dir."/".$file.".".$i ) {
+
+ # rotate it
+ rename $dir."/".$file.".".$i , $dir."/".$file.".".$j ;
+
+ # compress it if compression is enabled
+ if ( $compress ) {
+ my $cmd = "gzip ".$dir."/".$file.".".$j;
+ system ( $cmd );
+ }
+ }
+ # found a file which is already compressed
+ elsif ( -f $dir."/".$file.".".$i.".gz" ) {
+
+ # rotate it
+ rename $dir."/".$file.".".$i.".gz" , $dir."/".$file.".".$j.".gz" ;
+
+ # uncompress it if compression is disabled
+ if ( ! $compress ) {
+ my $cmd = "gzip -d ".$dir."/".$file.".".$j.".gz";
+ system ( $cmd );
+ }
+ }
+ }
+
+ # last rotate, log -> log.0
+ rename $dir."/".$file , $dir."/".$file.".0" ;
+
+ # create an empty file (prevent re-using of file .0 in stats parser)
+ open( EMPTYFILE , "> ".$dir."/".$file );
+ print EMPTYFILE "";
+ close( EMPTYFILE );
+}
+
1;
Modified: trunk/vhffs-backend/conf/vhffs.conf
===================================================================
--- trunk/vhffs-backend/conf/vhffs.conf 2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-backend/conf/vhffs.conf 2007-01-20 09:25:11 UTC (rev 411)
@@ -180,6 +180,32 @@
#####################################
<services>
+ <web>
+ # Where to find log from webserver, each webserver should be in a separate directory, example :
+ # - /data/logs/web/incoming/webserver0
+ # - /data/logs/web/incoming/webserver1
+ # - ...
+ log_incoming_root = /data/logs/web/incoming
+
+ # The number of maximum rotations, a rotation is made each time the robots to create stats is ran
+ log_incoming_rotations = 7
+
+ # Should I compress rotated logs ?
+ log_incoming_compress = yes
+
+ # Where to put parsed logfile from each website, it uses the same hash model of web storage,
+ # consider using the same tree for both so user have access to its logs in its ftp account.
+ # If log_parsed_root = /data/web and log_parsed_dir = logs then example.com logs
+ # will be in /data/web/1b/df/72/example.com/logs/
+ log_parsed_root = /data/web
+
+ # Same as incoming
+ log_parsed_rotation = 7
+ log_parsed_compress = yes
+
+ # To be continued...
+ </web>
+
#DNS configuration
<dns>
#Default configuration for each domain-name
Modified: trunk/vhffs-robots/src/generate_webstats.sh
===================================================================
--- trunk/vhffs-robots/src/generate_webstats.sh 2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-robots/src/generate_webstats.sh 2007-01-20 09:25:11 UTC (rev 411)
@@ -45,6 +45,10 @@
echo "Parse http logs"
/usr/lib/vhffs/bots/parse_httplog.pl
+^^^^^^^^^^^^^^^^^
+ALREADY CONVERTED
+
+
echo "Create awstats config files for each website"
/usr/lib/vhffs/bots/create_statsconf.pl
Modified: trunk/vhffs-robots/src/parse_httplog.pl
===================================================================
--- trunk/vhffs-robots/src/parse_httplog.pl 2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-robots/src/parse_httplog.pl 2007-01-20 09:25:11 UTC (rev 411)
@@ -29,66 +29,137 @@
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
-#Parse logs and put it in
-#$OUTPUTDIR/$SERVERNAME
#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b"
#So, add in your httpd.conf following lines :
#LogFormat "%V %h %l %u %t \"%r\" %>s %b" vhffs
-#CustomLog /var/log/apache2/Vhffs::Robots::vhffs_log vhffs
+#CustomLog /var/log/apache2/vhffs.log vhffs
use strict;
+use Vhffs::Main;
+use Vhffs::Conf;
+use Vhffs::Functions;
+use Vhffs::Services::Httpd;
-my $LOGFILE = "/data/logs/web/incoming/logfile";
-my $OUTPUTDIR = "/data/logs/web/sites";
-my $LOGNAME = "logfile";
-if( ! -f $LOGFILE )
-{
- print "Error, $LOGFILE is not readable\n";
- exit( -1 );
+my $vhffs = init Vhffs::Main;
+die "ERROR: Cannot init VHFFS !!!" if( ! defined $vhffs );
+
+my $webs = Vhffs::Services::Httpd::getall( $vhffs , Vhffs::Constants::ACTIVATED );
+die "ERROR: Cannot fetch the list of websites\n" if( ! defined $webs );
+
+my $webconf = Vhffs::Conf::get_web_config();
+die "ERROR: A full web configuration is needed to generate stats\n" if ( ! defined $webconf );
+
+my $log_incoming_root = $webconf->{"log_incoming_root"};
+my $log_incoming_rotations = $webconf->{"log_incoming_rotations"};
+my $log_incoming_compress = ( $webconf->{"log_incoming_compress"} eq 'yes' ) ? 1 : 0;
+my $log_parsed_root = $webconf->{"log_parsed_root"};
+my $log_parsed_rotation = $webconf->{"log_parsed_rotation"};
+my $log_parsed_compress = ( $webconf->{"log_parsed_compress"} eq 'yes' ) ? 1 : 0;
+
+die "ERROR: ".$log_incoming_root." is not a directory" if( ! -d $log_incoming_root );
+die "ERROR: ".$log_parsed_root." is not a directory" if( ! -d $log_parsed_root );
+
+
+my %websites;
+foreach ( @{$webs} ) {
+ $websites{$_->get_servername} = 1;
}
-if( ! -d $OUTPUTDIR )
-{
- print "Error, $OUTPUTDIR is not a directory ";
- exit( -1 );
+my @webservers;
+my $webserver;
+my @files;
+
+# -- Rotate web servers logs (all *.log files, I know that the suffix is hardcoded but I don't
+# bother to add a configuration entry for that, it's already too complicated, and, who would
+# like anything other than .log ? ).
+
+opendir( DIR , $log_incoming_root );
+@files = readdir( DIR );
+
+foreach( @files ) {
+ next if( /\./ );
+ if( -d $log_incoming_root."/".$_ ) {
+ push @webservers, $_;
+ }
}
+closedir( DIR );
+
+foreach $webserver ( @webservers ) {
+ opendir ( DIR , $log_incoming_root."/".$webserver );
+ @files = readdir( DIR );
+
+ foreach ( @files ) {
+ if( /.*\.log$/ ) {
+ Vhffs::Functions::rotate_log( $log_incoming_root."/".$webserver."/".$_ , $log_incoming_rotations , $log_incoming_compress );
+ }
+ }
+
+ # put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
+ open( FILECYCLE , "> ".$log_incoming_root."/".$webserver."/cycleok" );
+ print FILECYCLE "";
+ close( FILECYCLE );
+}
+
+# Wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
+#sleep ( 180 );
+
+# Deleting previous logs
+unlink $log_incoming_root."/mergedlog" if( -f $log_incoming_root."/mergedlog" );
+unlink $log_incoming_root."/rejectlog" if( -f $log_incoming_root."/rejectlog" );
+
+# Merge all logs
+my $cmd = "mergelog ".$log_incoming_root."/*/vhffs.log.0 > ".$log_incoming_root."/mergedlog";
+system ( $cmd );
+
+
+# Parse http logs
my %logs;
my $line;
my $svname;
my $log;
-my @temp;
-my $dir;
+my $prev = "";
-open( FILEIN , $LOGFILE );
+use Data::Dumper;
-while( $line = <FILEIN> )
-{
- if( ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) )
- {
+open( MERGEDIN , "< ".$log_incoming_root."/mergedlog" );
+open( REJECTOUT, "> ".$log_incoming_root."/rejectlog" );
+
+while( $line = <MERGEDIN> ) {
+ if( ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) ) {
+
+ # Discard www
$svname =~ s/^www\.//;
- push( @{$logs{$svname}} , $log );
- }
-}
-foreach $svname ( keys %logs )
-{
- $dir = $OUTPUTDIR . "/" . $svname;
- mkdir $dir if( ! -d $dir );
- my $logfile = $dir ."/" . $LOGNAME;
+ # We are _NOT_ hosting this website
+ if( ! exists $websites{$svname} ) {
- open( FILEOUT , ">$logfile" );
- @temp = @{$logs{$svname}};
+ print REJECTOUT $svname." ".$log."\n";
+ }
- foreach $log ( @temp )
- {
- print FILEOUT $log."\n";
+ # We host this website
+ else {
+ # the website changed
+ if ( $prev ne $svname ) {
+ print "Changing to $svname\n";
+
+ my $dir = $log_parsed_root."/".Vhffs::Functions::hash_webdir( $svname )."/logs";
+ Vhffs::Functions::create_dir($dir) if ( ! -d $dir );
+
+ my $lff = $dir."/access.log";
+ close(FILEOUT);
+ open( FILEOUT , ">> ".$lff );
+
+ $prev = $svname;
+ }
+
+ print FILEOUT $log."\n";
+ }
}
-
- close( FILEOUT );
}
-close(FILEIN);
-
+close(MERGEDIN);
+close(REJECTOUT);
+close(FILEOUT);