[vhffs-dev] [411] Started a new way to generate and stores stats, not finished yet but I much catch some sleep now, cya later guys:-)

[ Thread Index | Date Index | More vhffs.org/vhffs-dev Archives ]


Revision: 411
Author:   gradator
Date:     2007-01-20 09:25:11 +0000 (Sat, 20 Jan 2007)

Log Message:
-----------
Started a new way to generate and stores stats, not finished yet but I much catch some sleep now, cya later guys:-)

Modified Paths:
--------------
    trunk/vhffs-api/src/Vhffs/Conf.pm
    trunk/vhffs-api/src/Vhffs/Functions.pm
    trunk/vhffs-backend/conf/vhffs.conf
    trunk/vhffs-robots/src/generate_webstats.sh
    trunk/vhffs-robots/src/parse_httplog.pl


Modified: trunk/vhffs-api/src/Vhffs/Conf.pm
===================================================================
--- trunk/vhffs-api/src/Vhffs/Conf.pm	2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-api/src/Vhffs/Conf.pm	2007-01-20 09:25:11 UTC (rev 411)
@@ -927,4 +927,19 @@
 }
 
 
+sub get_web_config
+{
+	my $webconf = $Config{"services"}{"web"};
+
+	if( defined $webconf )
+	{
+		return $webconf;
+	}
+	else
+	{
+		return undef;
+	}
+}
+
+
 1;

Modified: trunk/vhffs-api/src/Vhffs/Functions.pm
===================================================================
--- trunk/vhffs-api/src/Vhffs/Functions.pm	2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-api/src/Vhffs/Functions.pm	2007-01-20 09:25:11 UTC (rev 411)
@@ -448,4 +448,69 @@
 }
 
 
+sub rotate_log
+{
+	use File::Basename;
+
+	my $path = shift;
+	my $rotation = shift;
+	my $compress = shift;
+	my $dir = dirname ( $path );
+	my $file = basename ( $path );
+
+	return if ( ! -f $path );
+
+	# remove files out of the rotation
+	opendir ( DIR , $dir );
+	my @files = readdir( DIR );
+	foreach ( @files )
+	{
+		next if( $file ne substr($_, 0, length( $file ) ) );
+		my $suffix = substr($_, length( $file ) );
+		$suffix =~ s/^.//;
+		$suffix =~ s/.gz$//;
+		next if( ! ( $suffix =~ /\d+/ ) );
+		unlink $dir."/".$_ if( $suffix >= $rotation-1 );
+	}
+
+	# rotate logs
+	my $i;
+	for ( $i = $rotation-2 ; $i >= 0 ; $i-- )  {
+		my $j = $i +1;
+
+		# found a file which is not compressed
+		if ( -f $dir."/".$file.".".$i )  {
+
+			# rotate it
+			rename $dir."/".$file.".".$i , $dir."/".$file.".".$j ;
+	
+			# compress it if compression is enabled
+			if ( $compress )  {
+				my $cmd = "gzip ".$dir."/".$file.".".$j;
+				system ( $cmd );
+			}
+		}
+		# found a file which is already compressed
+		elsif ( -f $dir."/".$file.".".$i.".gz" )  {
+
+			# rotate it
+			rename $dir."/".$file.".".$i.".gz" , $dir."/".$file.".".$j.".gz" ;
+
+			# uncompress it if compression is disabled
+			if ( ! $compress )  {
+				my $cmd = "gzip -d ".$dir."/".$file.".".$j.".gz";
+				system ( $cmd );
+			}
+		}
+	}
+
+	# last rotate, log -> log.0
+	rename $dir."/".$file , $dir."/".$file.".0" ;
+
+	# create an empty file (prevent re-using of file .0 in stats parser)
+	open( EMPTYFILE , "> ".$dir."/".$file );
+	print EMPTYFILE "";
+	close( EMPTYFILE );
+}
+
 1;

Modified: trunk/vhffs-backend/conf/vhffs.conf
===================================================================
--- trunk/vhffs-backend/conf/vhffs.conf	2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-backend/conf/vhffs.conf	2007-01-20 09:25:11 UTC (rev 411)
@@ -180,6 +180,32 @@
 #####################################
 <services>
 
+	<web>
+		# Where to find log from webserver, each webserver should be in a separate directory, example :
+		# - /data/logs/web/incoming/webserver0
+		# - /data/logs/web/incoming/webserver1
+		# - ...
+		log_incoming_root = /data/logs/web/incoming
+
+		# The number of maximum rotations, a rotation is made each time the robots to create stats is ran
+		log_incoming_rotations = 7
+
+		# Should I compress rotated logs ?
+		log_incoming_compress = yes
+
+		# Where to put parsed logfile from each website, it uses the same hash model of web storage,
+		# consider using the same tree for both so user have access to its logs in its ftp account.
+		# If log_parsed_root = /data/web and log_parsed_dir = logs then example.com logs
+		# will be in /data/web/1b/df/72/example.com/logs/
+		log_parsed_root = /data/web
+
+		# Same as incoming
+		log_parsed_rotation = 7
+		log_parsed_compress = yes		
+
+		# To be continued...
+	</web>
+
 	#DNS configuration
 	<dns>
 		#Default configuration for each domain-name

Modified: trunk/vhffs-robots/src/generate_webstats.sh
===================================================================
--- trunk/vhffs-robots/src/generate_webstats.sh	2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-robots/src/generate_webstats.sh	2007-01-20 09:25:11 UTC (rev 411)
@@ -45,6 +45,10 @@
 echo "Parse http logs"
 /usr/lib/vhffs/bots/parse_httplog.pl
 
+^^^^^^^^^^^^^^^^^
+ALREADY CONVERTED
+
+
 echo "Create awstats config files for each website"
 /usr/lib/vhffs/bots/create_statsconf.pl
 

Modified: trunk/vhffs-robots/src/parse_httplog.pl
===================================================================
--- trunk/vhffs-robots/src/parse_httplog.pl	2007-01-19 13:22:03 UTC (rev 410)
+++ trunk/vhffs-robots/src/parse_httplog.pl	2007-01-20 09:25:11 UTC (rev 411)
@@ -29,66 +29,137 @@
 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 # POSSIBILITY OF SUCH DAMAGE.
 
-#Parse logs and put it in 
-#$OUTPUTDIR/$SERVERNAME
 
 #Your logs must be in format : "%V %h %l %u %t \"%r\" %>s %b"
 #So, add in your httpd.conf following lines :
 #LogFormat "%V %h %l %u %t \"%r\" %>s %b" vhffs
-#CustomLog /var/log/apache2/Vhffs::Robots::vhffs_log vhffs
+#CustomLog /var/log/apache2/vhffs.log vhffs
 
 use strict;
+use Vhffs::Main;
+use Vhffs::Conf;
+use Vhffs::Functions;
+use Vhffs::Services::Httpd;
 
-my $LOGFILE   = "/data/logs/web/incoming/logfile";
-my $OUTPUTDIR = "/data/logs/web/sites";
-my $LOGNAME   = "logfile";
 
-if( ! -f $LOGFILE )
-{
-	print "Error, $LOGFILE is not readable\n";
-	exit( -1 );
+my $vhffs = init Vhffs::Main;
+die "ERROR: Cannot init VHFFS !!!" if( ! defined $vhffs );
+
+my $webs = Vhffs::Services::Httpd::getall( $vhffs , Vhffs::Constants::ACTIVATED );
+die "ERROR: Cannot fetch the list of websites\n" if( ! defined $webs );
+
+my $webconf = Vhffs::Conf::get_web_config();
+die "ERROR: A full web configuration is needed to generate stats\n" if ( ! defined $webconf );
+
+my $log_incoming_root = $webconf->{"log_incoming_root"};
+my $log_incoming_rotations = $webconf->{"log_incoming_rotations"};
+my $log_incoming_compress = ( $webconf->{"log_incoming_compress"} eq 'yes' ) ? 1 : 0;
+my $log_parsed_root = $webconf->{"log_parsed_root"};
+my $log_parsed_rotation = $webconf->{"log_parsed_rotation"};
+my $log_parsed_compress = ( $webconf->{"log_parsed_compress"} eq 'yes' ) ? 1 : 0;
+
+die "ERROR: ".$log_incoming_root." is not a directory" if( ! -d $log_incoming_root );
+die "ERROR: ".$log_parsed_root." is not a directory" if( ! -d $log_parsed_root );
+
+
+my %websites;
+foreach ( @{$webs} )  {
+	$websites{$_->get_servername} = 1;
 }
 
-if( ! -d $OUTPUTDIR )
-{
-	print "Error, $OUTPUTDIR is not a directory ";
-	exit( -1 );
+my @webservers;
+my $webserver;
+my @files;
+
+# -- Rotate web servers logs (all *.log files, I know that the suffix is hardcoded but I don't
+#    bother to add a configuration entry for that, it's already too complicated, and, who would
+#    like anything other than .log ? ).
+
+opendir( DIR , $log_incoming_root );
+@files = readdir( DIR );
+
+foreach( @files )  {
+	next if( /\./ );
+	if( -d $log_incoming_root."/".$_ )  {
+		push @webservers, $_;
+	}
 }
+closedir( DIR );
 
+
+foreach $webserver ( @webservers )  {
+	opendir ( DIR , $log_incoming_root."/".$webserver );
+	@files = readdir( DIR );
+
+	foreach ( @files )  {
+		if( /.*\.log$/ )  {
+			Vhffs::Functions::rotate_log( $log_incoming_root."/".$webserver."/".$_ , $log_incoming_rotations , $log_incoming_compress );
+		}
+	}
+
+	# put a file to tell webserver to restart (ugly signaling over nfs, I know I know... but IT WORKS, it is secure, and doesn't consume too much CPU !)
+	open( FILECYCLE , "> ".$log_incoming_root."/".$webserver."/cycleok" );
+	print FILECYCLE "";
+	close( FILECYCLE );
+}
+
+# Wait 180 seconds to be sure all apache are restarted (welcome to the land of pigs)
+#sleep ( 180 );
+
+# Deleting previous logs
+unlink $log_incoming_root."/mergedlog" if( -f $log_incoming_root."/mergedlog" );
+unlink $log_incoming_root."/rejectlog" if( -f $log_incoming_root."/rejectlog" );
+
+# Merge all logs
+my $cmd = "mergelog ".$log_incoming_root."/*/vhffs.log.0 > ".$log_incoming_root."/mergedlog";
+system ( $cmd );
+
+
+# Parse http logs
 my %logs;
 my $line;
 my $svname;
 my $log;
-my @temp;
-my $dir;
+my $prev = "";
 
-open( FILEIN , $LOGFILE );
+use Data::Dumper;
 
-while( $line = <FILEIN> )
-{
-	if( ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) )
-	{
+open( MERGEDIN , "< ".$log_incoming_root."/mergedlog" );
+open( REJECTOUT, "> ".$log_incoming_root."/rejectlog" );
+
+while( $line = <MERGEDIN> )  {
+	if( ( $svname , $log ) = ( $line =~ /([a-zA-Z0-9\.\-]+)\s(.+)/g) )  {
+
+		# Discard www
 		$svname =~ s/^www\.//;
-		push( @{$logs{$svname}} , $log );
-	}
-}
 
-foreach $svname ( keys %logs )
-{
-	$dir = $OUTPUTDIR . "/" . $svname;
-	mkdir $dir if( ! -d $dir );
-	my $logfile = $dir ."/" . $LOGNAME;
+		# We are _NOT_ hosting this website
+		if( ! exists $websites{$svname} )  {
 
-	open( FILEOUT , ">$logfile" );
-	@temp = @{$logs{$svname}};
+			print REJECTOUT $svname." ".$log."\n";
+		}
 
-	foreach $log ( @temp )
-	{
-		print FILEOUT  $log."\n";
+		# We host this website
+		else  {
+			# the website changed
+			if ( $prev ne $svname )  {
+				print "Changing to $svname\n";
+	
+				my $dir = $log_parsed_root."/".Vhffs::Functions::hash_webdir( $svname )."/logs";
+				Vhffs::Functions::create_dir($dir) if ( ! -d $dir );
+
+				my $lff = $dir."/access.log";
+				close(FILEOUT);
+				open( FILEOUT , ">> ".$lff );
+
+				$prev = $svname;
+			}
+
+			print FILEOUT $log."\n";
+		}
 	}
-
-	close( FILEOUT );
 }
 
-close(FILEIN);
-
+close(MERGEDIN);
+close(REJECTOUT);
+close(FILEOUT);


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/