#!/bin/perl5 ############################################################################ # # # The contents of this file are subject to the WebStone Public License # # Version 1.0 (the "License"); you may not use this file except in # # compliance with the License. You may obtain a copy of the License # # at http://www.mindcraft.com/webstone/license10.html # # # # Software distributed under the License is distributed on an "AS IS" # # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See # # the License for the specific language governing rights and limitations # # under the License. # # # # The Original Code is WebStone 2.5. # # # # The Initial Developer of the Original Code is Silicon Graphics, Inc. # # and Mindcraft, Inc.. Portions created by Silicon Graphics. and # # Mindcraft. are Copyright (C) 1995#1998 Silicon Graphics, Inc. and # # Mindcraft, Inc. All Rights Reserved. # # # # Contributor(s): ______________________________________. # # # ############################################################################ # @(#) mine-logs.wpl 2.3@(#) # mine-logs.wpl: # script to transform access logs into WebStone workload # # created 18 December 1995 mblakele@engr.sgi.com # # functional map: # usage: mine-logs.wpl access.log # # 1. For each line in the input # a. parse the URL and the time, the outcome code, and the size # b. if the code is 200, and it's a GET, # do we already know about this URL? # i. yes - increment its counter # ii. no - create a slot for it, record size, # and set counter=1 # $debug = 0; $line_number = 0; while (<>) { chomp; $line_number++; ($line_number % 1000) || printf STDERR "."; # parse line ( $client, $junk1, $junk2, $date, $timezone, $command, $url, $version, $result_code, $size ) = split; # strip some junk $command =~ s/\"//; $date =~ s/\[//; ($debug) && printf STDERR "$client, $date, $command, $url, $result_code, $size\n"; # is it a GET? Did it succeed? (i.e., is the result code 200?) if (($command eq 'GET') && ($result_code == 200)) { # is this URL already in the key set? if (exists $counter{$url}) { # URL is in key set ($debug) && printf STDERR "URL $url already in key set: incrementing\n"; $counter{$url}++; if ($size == $size{$url}) { ($debug) && printf STDERR "size mismatch on $url: $size != $size{$url}\n"; if ($size <=> $size{$url}) { $size{$url} = $size; } } } else { # URL isn't in key set ($debug) && printf STDERR "URL $url isn't in key set: adding size $size\n"; $counter{$url} = 1; $size{$url} = $size; } # end if key set } # end if GET } # end of input file printf STDERR "\n"; # now we print out a workload file # first, the headline $date = `date`; chomp($date); printf "# WebStone workload file\n# \tgenerated by $0 $date\n#\n"; # next, sort the keys @sorted_keys = sort by_counter keys(%counter); # iterate through sorted keys foreach $key (@sorted_keys) { # print url, weighting, and (commented) the size in bytes ($debug) && printf STDERR "printing data for $key\n"; printf "$key\t$counter{$key}\t#$size{$key}\n"; } # end foreach # end main sub by_counter { $counter{$b} <=> $counter{$a}; } # end by_counter # end mine-logs.wpl