#! /usr/bin/perl
#
#  Purpose:
#  FlowTracker_Collector runs periodically to collect 5-minute values of
#  netflow data, according to the specified tracking filter.
#
#  Description:
#  FlowTracker_Collector is used to collect 5-minute periods of netflow
#  data which is stored by flow-tools. The script is initiated by the user
#  from a command line, appending an '&' to keep it in the background. For 
#  each Tracking set created by the user and represented by a filter file 
#  in the FlowTracker_Filter directory, FlowTracker_Collector extracts the
#  last 5 minutes worth of data flows that match against the filter. The
#  resulting value is stored into the appropriate RRDtool data base.
#  FlowTracker_Collector will collect netflow totals according to the pre-
#  defined filters. It will collect this data for 5-minute periods that fall
#  on even 5-minute boundaries (e.g., 00:05, 00:10, 00:15, etc.) despite 
#  when it is actually started. It collects data from approximately 30 
#  minutes previous to allow for long flows to be exported into flow-tools.
#  When it is started up, FlowTracker_Collector will check for the last 
#  collect period, and if enough time has not passed, it will go to sleep
#  so that it does not collect twice for the same period.
#
#  Input arguments:
#  Name                 Description
#  -----------------------------------------------------------------------
#  collection_period    Determines the length between collections (e.g., 5 min.)
#  
#  Input files:
#  Name                 Description
#  -----------------------------------------------------------------------
#  Filter file          Used to control the extraction of netflow data
#  RRDtool file         This is the RRDtool file created for this tracking
#
#  Modification history:
#  Author       Date            Vers.   Description
#  -----------------------------------------------------------------------
#  J. Loiacono  07/04/2006      3.0     Original version.
#
#$Author$
#$Date$
#$Header$
#
###########################################################################
#
#               BEGIN EXECUTABLE STATEMENTS
#
 
use FlowViewer_Configuration; 
use FlowViewer_Utilities; 
use lib $cgi_bin_directory; 

$just_started_up = 1;

$a = 0; while ($a == 0) {

	if ($log_collector eq "Y") { open (LOG,">>$log_directory/FlowTracker_Collector.log"); }
	if ($debug_tracker eq "Y") { open (DEBUG,">$work_directory/DEBUG_TRACKER"); }

	($sec,$min,$hr,$date,$mnth,$yr,$day,$yr_date,$DST) = localtime(time);
	format_date($sec,$min,$hr,$date,$mnth,$yr);
	
	# Determine next period start and end times ( ... after sleep period.)

	$start_collect_time = time;
	$seconds_past_period = $start_collect_time % $collection_period;
	$period_end = $start_collect_time - $collection_offset - $seconds_past_period;
	$period_start = $period_end - $collection_period;
	
	($sec,$min,$hr,$date,$mnth,$yr,$day,$yr_date,$DST) = localtime($period_start);
	$mnth++;
	if (length($mnth) < 2) { $mnth = "0" . $mnth; }
	if (length($date) < 2) { $date = "0" . $date; }
	$period_start_md = $mnth . $date;
	$start_secs = 3600*$hr + 60*$min + $sec;
	
	($sec,$min,$hr,$date,$mnth,$yr,$day,$yr_date,$DST) = localtime($period_end);
	$mnth++;
	if (length($mnth) < 2) { $mnth = "0" . $mnth; }
	if (length($date) < 2) { $date = "0" . $date; }
	$period_end_md = $mnth . $date;
	$end_secs = 3600*$hr + 60*$min + $sec;
	
	$start_epoch = timelocal(localtime($period_start));
	$end_epoch   = timelocal(localtime($period_end));
	
	$start_flows = &flow_date_time($start_epoch,"LOCAL");
	$end_flows   = &flow_date_time($end_epoch,"LOCAL");
	
	($date_hr,$min,$sec) = split(/:/,$start_flows);
	$start_flows = $date_hr .":". $min .":00";
	
	($date_hr,$min,$sec) = split(/:/,$end_flows);
	$end_flows   = $date_hr .":". $min .":00";
	
	# Set up start and end time for concatenating flow-tools data files

	$cat_start_epoch = $start_epoch - $flow_file_length - 61;
	$cat_end_epoch   = $end_epoch   + $flow_capture_interval + 1 -600;
	$cat_start       = epoch_to_date($cat_start_epoch,"LOCAL");
	$cat_end         = epoch_to_date($cat_end_epoch,"LOCAL");
	
	($cat_start_date,$cat_start_time)   = split(/ /,$cat_start);
	($start_month,$start_day,$start_yr) = split(/\//,$cat_start_date);
	($cat_end_date,$cat_end_time)       = split(/ /,$cat_end);
	($end_month,$end_day,$end_yr)       = split(/\//,$cat_end_date);
	 
	# For this time period, go through each tracking 

	$num_trackings = 0;

	while ($existing_filter = <$filter_directory/*>) {
		
		$concatenate_parameters = "-a -t \"$cat_start\" -T \"$cat_end\" ";
	 
		open (EXISTING,"<$existing_filter");
	
	        $existing_filter =~ s#.*/##; 
		($tracking_file,$suffix) = split(/\./,$existing_filter);
		$html_directory = "$tracker_directory/$tracking_file";
		$rrdtool_file   = "$rrdtool_directory/$tracking_file.rrd";

		# If this is a quick restart (within collection period), avoid trying to update an existing period

		if ($just_started_up) {
			open (INFO,">$work_directory/FlowTracker_Collector_info");
			$rrd_info_command = "$rrdtool_bin_directory/rrdtool info $rrdtool_file > $work_directory/FlowTracker_Collector_info";
			system($rrd_info_command);
			open (INFO,"<$work_directory/FlowTracker_Collector_info");
			while (<INFO>) {
				chop;
				$lead = substr($_,0,11);
				if ($lead eq "last_update") { 
					($lead,$last_update) = split(/ = /);
					if ($period_end <= $last_update) {
						$skip = 1;
						if ($log_collector eq "Y") { 
							print LOG "Just starting (and skipping) for: $tracking_file\n"; 
						}
						last;
					}
				}
			}
			close (INFO);
		}

		if ($skip) { $skip = 0; next; }

		# Extract information for this tracking and modify filter for this time period

		$num_trackings++;

		open (FILTER,">$work_directory/FlowTracker_Collector_filter");
	
		while (<EXISTING>) {
			chop;
			$key = substr($_,0,8);
                	if ($key eq " input: ") {
                        	($input,$field,$field_value) = split(/: /);
                        	if    ($field eq "device_name") { 
					$device_name = $field_value; }
                        	elsif ($field eq "tracking_label") { 
					$tracking_label = $field_value; }
				else { next; }
			}
			elsif (/permit ge January 1, 2000 00:00:00/) {
				print FILTER "  permit ge $start_flows\n"; }
			elsif (/permit lt January 1, 2000 00:00:00/) {
				print FILTER "  permit lt $end_flows\n"; }
			else {
				print FILTER "$_\n";
				next;
			}
		}

		close (FILTER);
	
		if ($start_day ne $end_day) {
		        for ($i=0;$i<31;$i++) {
		                if (($cat_start_epoch + $i*86400) > $cat_end_epoch + 86400) { last; }
		                ($sec,$min,$hr,$cat_date,$cat_mnth,$cat_yr,$day,$yr_date,$DST) = localtime($cat_start_epoch + $i*86400);
		                $cat_mnth++;
		                $cat_yr += 1900;
		                if ((0 < $cat_mnth) && ($cat_mnth < 10)) { $cat_mnth = "0" . $cat_mnth; }
		                if ((0 < $cat_date) && ($cat_date < 10)) { $cat_date = "0" . $cat_date; }
		 
		                $cat_directory = "$flow_data_directory/$device_name";
		                if ($N == -3) { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		                if ($N == -2) { $cat_directory .= "/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		                if ($N == -1) { $cat_directory .= "/$cat_yr\-$cat_mnth\-$cat_date"; }
		                if ($N == 1)  { $cat_directory .= "/$cat_yr"; }
		                if ($N == 2)  { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth"; }
		                if ($N == 3)  { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		 
		                $concatenate_parameters .= "$cat_directory ";
		        }
		}
		else {
		 
		        ($sec,$min,$hr,$cat_date,$cat_mnth,$cat_yr,$day,$yr_date,$DST) = localtime($cat_end_epoch);
		        $cat_mnth++;
		        $cat_yr += 1900;
		        if ((0 < $cat_mnth) && ($cat_mnth < 10)) { $cat_mnth = "0" . $cat_mnth; }
		        if ((0 < $cat_date) && ($cat_date < 10)) { $cat_date = "0" . $cat_date; }
		 
		        $cat_directory = "$flow_data_directory/$device_name";
		        if ($N == -3) { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		        if ($N == -2) { $cat_directory .= "/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		        if ($N == -1) { $cat_directory .= "/$cat_yr\-$cat_mnth\-$cat_date"; }
		        if ($N == 1)  { $cat_directory .= "/$cat_yr"; }
		        if ($N == 2)  { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth"; }
		        if ($N == 3)  { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		 
		        $concatenate_parameters .= "$cat_directory ";
		}
	
		# Set up the command to concatenate the files
		 
		$flowcat_command = "$flow_bin_directory/flow-cat" . " $concatenate_parameters";
		 
		# Set up the command to filter the concatenated file
		 
		$flownfilter_command = "$flow_bin_directory/flow-nfilter -f $work_directory/FlowTracker_Collector_filter -FFlow_Filter";
		 
		# Set up the flow-print command to get the individual flows
		 
	        $flowprint_command = "$flow_bin_directory/flow-print -f5 >$work_directory/FlowTracker_Collector_output";
	        $flow_run = "$flowcat_command | $flownfilter_command | $flowprint_command";
		system($flow_run);
	
		$period_bits         = 0;
		$total_bytes         = 0;
		$total_overlap_bytes = 0;
		$total_flow_bytes    = 0;
	
		# Parse through all flows that matched the filter, adding bits if part of flow within period

		open(FLOWS,"<$work_directory/FlowTracker_Collector_output");
		while (<FLOWS>) {
	
		        $first_char = substr($_,0,1);
		        if (!($first_char =~ /[0-9]/)) { next; }
		 
		        ($s_time,$e_time,$sif,$sip,$sp,$dif,$dip,$dp,$p,$fl,$pkt,$oct) = split(/\s+/,$_);
			$total_bytes += $oct;
		 
		        ($smd,$s_tm,$s_ms) = split(/\./,$s_time);
		        ($emd,$e_tm,$e_ms) = split(/\./,$e_time);
		 
		        ($shr,$smn,$ssc) = split(/:/,$s_tm);
		        ($ehr,$emn,$esc) = split(/:/,$e_tm);
		 
		        $s_secs = 3600*$shr + 60*$smn + $ssc;
		        $e_secs = 3600*$ehr + 60*$emn + $esc;
		 
			# Determine flow time length
		
			if ($smd eq $emd) {
		                $flow_length = ($e_secs + ($e_ms/1000)) - ($s_secs + ($s_ms/1000));
		                if ($flow_length <= 0) { $flow_length = 0.001; } }
			else {
		                $flow_length = ($e_secs + ($e_ms/1000)) + (86400 - ($s_secs + ($s_ms/1000)));
		                if ($flow_length <= 0) { $flow_length = 0.001; }
			}
		
		        $ss_delta_md = $smd - $period_start_md;
		        $es_delta_md = $emd - $period_start_md;
		        $se_delta_md = $smd - $period_end_md;
		 
		        if ($ss_delta_md == 0) {
		                $start_delta = ($s_secs + ($s_ms/1000)) - $start_secs; }
		        elsif ($ss_delta_md == 1) {
		                $start_delta = 86400 - $start_secs + ($s_secs + ($s_ms/1000)); }
		        elsif ($ss_delta_md == -1) {
		                $start_delta = ($s_secs + ($s_ms/1000)) - 86400; }
		
		        if ($es_delta_md == 0) {
		                $end_delta = ($e_secs + ($e_ms/1000)) - $start_secs; }
		        elsif ($es_delta_md == 1) {
		                $end_delta = 86400 - $start_secs + ($e_secs + ($e_ms/1000)); }
		        elsif ($es_delta_md == -1) {
		                $end_delta = ($e_secs + ($e_ms/1000)) - 86400; }
		
			# Exclude flows totally outside of the period
		
			if ($start_delta >= $collection_period) { next; }
			if ($end_delta   <= 0)   { next; }
		
			# Compute portion of this flow's bits that are within the period
		
			if ($start_delta < 0) {
				if ($end_delta < $collection_period) {
					$flow_octs = int (($end_delta/$flow_length) * $oct);
					$period_bits += 8 * $flow_octs; }
				else {
					$flow_octs = int (($collection_period/$flow_length) * $oct);
					$period_bits += 8 * $flow_octs; }
				}
			else {
				if ($end_delta < $collection_period) {
					$flow_octs = $oct;
					$period_bits += 8 * $flow_octs; }
				else {
					$flow_octs = int ((($collection_period - $start_delta)/$flow_length) * $oct);
					$period_bits += 8 * $flow_octs; }
			}
		
			$total_overlap_bytes += $flow_octs;
		}
	
		# Get a per-second average for the 5 minute period

		$collection_period_avg = int( $period_bits / $collection_period );

		# Update the appropriate RRD file

		$rrdtool_command = "$rrdtool_bin_directory/rrdtool update $rrdtool_file $period_end:$collection_period_avg";
		system($rrdtool_command);
	
		if ($log_collector eq "Y") {

			print LOG "                     For: $tracking_label\n";
			print LOG "            RRDtool File: $rrdtool_file\n";
			print LOG "             start_flows: $start_flows\n";
			print LOG "               end_flows: $end_flows\n";
			print LOG "       Collection Period: $collection_period\n";
			print LOG "    Bytes from all Flows: $total_bytes\n";
			print LOG "Bytes overlapping Period: $total_overlap_bytes\n";
			print LOG "          Bits in Period: $period_bits\n";
			print LOG "        CollectPeriodAvg: $collection_period_avg\n";
			print LOG "                  Update: $period_end : $period_bits\n";
			print LOG "\n";
		}
	}
	
	$just_started_up = 0;

	$end_collect_time = time;
	$loop_time = $end_collect_time - $start_collect_time;

	if ($log_collector eq "Y") { print LOG "At $formatted_date finished with this loop. $num_trackings Trackings. Loop took: $loop_time seconds\n\n\n"; }

	close (LOG);
	close (DEBUG);

	$sleep_period = $collection_period - $loop_time;
	sleep ($sleep_period);
}
