[vhffs-dev] [414] renaming parse_httplog.pl to generate_webstats.pl |
[ Thread Index |
Date Index
| More vhffs.org/vhffs-dev Archives
]
Revision: 414
Author: gradator
Date: 2007-01-21 08:02:08 +0000 (Sun, 21 Jan 2007)
Log Message:
-----------
renaming parse_httplog.pl to generate_webstats.pl
Added Paths:
-----------
trunk/vhffs-robots/src/generate_webstats.pl
Removed Paths:
-------------
trunk/vhffs-robots/src/parse_httplog.pl
Copied: trunk/vhffs-robots/src/generate_webstats.pl (from rev 413, trunk/vhffs-robots/src/parse_httplog.pl)
Deleted: trunk/vhffs-robots/src/parse_httplog.pl
===================================================================
--- trunk/vhffs-robots/src/parse_httplog.pl 2007-01-21 08:01:33 UTC (rev 413)
+++ trunk/vhffs-robots/src/parse_httplog.pl 2007-01-21 08:02:08 UTC (rev 414)
@@ -1,212 +0,0 @@
-#!/usr/bin/perl -w
-# Copyright (c) vhffs project and its contributors
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-#3. Neither the name of vhffs nor the names of its contributors
-# may be used to endorse or promote products derived from this
-# software without specific prior written permission.
-#
-#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-#FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-#COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-#INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-#BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-#CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-
-#Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b"
-#So, add in your httpd.conf following lines :
-#LogFormat "%V %h %l %u %t \"%r\" %>s %b" vhffs
-#CustomLog /var/log/apache2/vhffs.log vhffs
-
-use strict;
-use Vhffs::Main;
-use Vhffs::Conf;
-use Vhffs::Functions;
-use Vhffs::Services::Httpd;
-
-
-my $vhffs = init Vhffs::Main;
-die "ERROR: Cannot init VHFFS !!!" if( ! defined $vhffs );
-
-my $webs = Vhffs::Services::Httpd::getall( $vhffs , Vhffs::Constants::ACTIVATED );
-die "ERROR: Cannot fetch the list of websites\n" if( ! defined $webs );
-
-my $webconf = Vhffs::Conf::get_web_config();
-die "ERROR: A full web configuration is needed to generate stats\n" if ( ! defined $webconf );
-
-my $log_incoming_root = $webconf->{"log_incoming_root"};
-my $log_incoming_rotations = $webconf->{"log_incoming_rotations"};
-my $log_incoming_compress = ( $webconf->{"log_incoming_compress"} eq 'yes' ) ? 1 : 0;
-my $log_parsed_root = $webconf->{"log_parsed_root"};
-my $log_parsed_rotation = $webconf->{"log_parsed_rotation"};
-my $log_parsed_compress = ( $webconf->{"log_parsed_compress"} eq 'yes' ) ? 1 : 0;
-my $log_awstat = $webconf->{"log_awstat"};
-my $log_apachelocal = ( $webconf->{"log_apachelocal"} eq 'yes' ) ? 1 : 0;
-my $log_apachectl = $webconf->{"log_apachectl"};
-my $awstat_sample = "/usr/lib/vhffs/bots/misc/awstats.sample";
-
-die "ERROR: ".$log_incoming_root." is not a directory" if( ! -d $log_incoming_root );
-die "ERROR: ".$log_parsed_root." is not a directory" if( ! -d $log_parsed_root );
-die "ERROR: ".$log_awstat." does no exist" if( ! -f $log_awstat );
-die "ERROR: cannot find the awstat sample at ".$awstat_sample if( ! -f $awstat_sample );
-die "ERROR: cannot find the apache2ctl binary at ".$log_apachectl if( $log_apachelocal && ! -f $log_apachectl );
-
-my %websites;
-foreach ( @{$webs} ) {
- $websites{$_->get_servername} = 1;
-}
-
-my @webservers;
-my $webserver;
-my @files;
-
-# -- Rotate web servers logs (all *.log files, I know that the suffix is hardcoded but I don't
-# bother to add a configuration entry for that, it's already too complicated, and, who would
-# like anything other than .log ? ).
-
-opendir( DIR , $log_incoming_root );
-@files = readdir( DIR );
-
-foreach( @files ) {
- next if( /\./ );
- if( -d $log_incoming_root."/".$_ ) {
- push @webservers, $_;
- }
-}
-closedir( DIR );
-
-
-foreach $webserver ( @webservers ) {
- opendir ( DIR , $log_incoming_root."/".$webserver );
- @files = readdir( DIR );
-
- foreach ( @files ) {
- if( /.*\.log$/ ) {
- Vhffs::Functions::rotate_log( $log_incoming_root."/".$webserver."/".$_ , $log_incoming_rotations , $log_incoming_compress );
- }
- }
-
- # put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
- open( FILECYCLE , "> ".$log_incoming_root."/".$webserver."/cycleok" );
- print FILECYCLE "";
- close( FILECYCLE );
-}
-
-# Restart locally or wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
-if( $log_apachelocal ) {
- my $cmd = $log_apachectl." graceful";
- system ( $cmd );
-} else {
- sleep ( 180 );
-}
-
-# Deleting previous logs
-unlink $log_incoming_root."/mergedlog" if( -f $log_incoming_root."/mergedlog" );
-unlink $log_incoming_root."/rejectlog" if( -f $log_incoming_root."/rejectlog" );
-
-# Merge all logs
-my $cmd = "mergelog ".$log_incoming_root."/*/vhffs.log.0 > ".$log_incoming_root."/mergedlog";
-system ( $cmd );
-
-
-# Parse http logs
-my %logs;
-my $line;
-my $svname;
-my $log;
-my $prev = "";
-
-open( MERGEDIN , "< ".$log_incoming_root."/mergedlog" );
-open( REJECTOUT, "> ".$log_incoming_root."/rejectlog" );
-
-while( $line = <MERGEDIN> ) {
- if( ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) ) {
-
- # Discard www
- $svname =~ s/^www\.//;
-
- # We are _NOT_ hosting this website
- if( ! exists $websites{$svname} ) {
-
- print REJECTOUT $svname." ".$log."\n";
- }
-
- # We host this website
- else {
- # the website changed
- if ( $prev ne $svname ) {
- my $dir = $log_parsed_root."/".Vhffs::Functions::hash_webdir( $svname )."/logs";
- Vhffs::Functions::create_dir($dir) if ( ! -d $dir );
-
- my $lff = $dir."/access.log";
- close(FILEOUT);
- open( FILEOUT , ">> ".$lff );
-
- $prev = $svname;
- }
-
- print FILEOUT $log."\n";
- }
- }
-}
-
-close(MERGEDIN);
-close(REJECTOUT);
-close(FILEOUT);
-
-
-# Create a configuration file and generate statistic for each website
-foreach ( @{$webs} )
-{
- my $svname = $_->get_servername;
-
- my $weblogdir = $log_parsed_root."/".Vhffs::Functions::hash_webdir( $svname );
- my $conffile = $weblogdir."/awstats.".$svname.".conf";
- my $logpath = $weblogdir."/logs/access.log";
- my $datadir = $weblogdir."/awstats";
-
- #next if( -f $conffile );
-
- next if ( ! -f $logpath );
- Vhffs::Functions::create_dir($datadir) if ( ! -d $datadir );
-
- # Create the config file
- open( AWFILEIN , "< ".$awstat_sample );
- open( AWFILEOUT , "> ".$conffile );
-
- while( $line = <AWFILEIN> )
- {
- $line =~ s/MY_DOMAINNAME/$svname/g;
- $line =~ s/MY_LOGPATH/$logpath/g;
- $line =~ s/MY_DATADIR/$datadir/g;
-
- print AWFILEOUT $line;
- }
-
- close( AWFILEOUT );
- close( AWFILEIN );
-
- # Generate statistics
- $cmd = $log_awstat." -config=".$svname." -update 2>/dev/null 1>/dev/null";
- system( $cmd );
-
- # Rotate logs for this website
- Vhffs::Functions::rotate_log( $logpath , $log_parsed_rotation , $log_parsed_compress );
-}