	#!/usr/local/bin/perl
	
	# Filter for .trc files
	# 

	#format of traces
	$_Timestamp=0; 
	$_Secstamp=1 ;$_Elapsed_Time=2; $_Client_Address=3;
	$_HTTP_code=4; $_Size=5; $_Method=6; $_URL=7; $_User_Ident=8;
	$_Hierarchy=9; $_Type=10; $_Exp_Date=11; $_Next_Acc=12; 
	$_Cacheable=13;
	$path="";							#path to the working directory
	$acc_nr=0;							#number of accessed URLs

	#open files for reading traces & writing filtered
	$s = "$path"."new.trc.new";
	open(NEWFILE,">$s") || die "$s is not found.";
	
	
do{	#process all files in directory with extension ".trc"
	print "\nDo";
	foreach $file (<$path*.trc>) {
	print "\nFile $file";
       	open(TRACES, $file) || die "$f will not open.";
		print "\nOpened: ". $file;

		while(<TRACES>) { # takes data from $file
			chop($_);
			substr($_, 9, 1) = " ";
			@list = split(" ", $_);
			if($_ !~ /\?|cgi\-bin/){$pom = "1";}#cacheable if URL containes no ? cgi or bin
			else{$pom = "0";}
			&reURL;								#shortenes URLs
			print(NEWFILE 
				  $list[$_Timestamp]," ",		#_Timestamp=0
				  $list[$_Elapsed_Time]," ",	#_Elapsed_Time=1;
				  $list[$_HTTP_code]," ",		#_HTTP_code=2;
				  $list[$_Size]," ",			#_Size=3;
				  $list[$_Method]," ",			#_Method=4;
				  $list[$_URL]," ",				#_URL=5;
				  "<<<<<<<<< ",					#_Exp_Date=6
				  ">>>>>>>>> ",					#_Next_Acc=7;
				  "$pom\n");					#_Cacheable=8;
		}
		close(TRACES);
    }
    print "\nAll files in directory processed. \nFor the end type 'kraj': ";
    $_=<STDIN>;chop $_; print $_;
    
}until($_ eq "kraj");
	close(NEWFILE);
	print "\nClosed: ". $s;
	undef %visited;
	
	#format of new traces
	$_Timestamp=0; 	$_Elapsed_Time=1;	$_HTTP_code=2;  
	$_Size=3; 		$_Method=4; 		$_URL=5;
	$_Exp_Date=6;	$_Next_Acc=7; 		$_Cacheable=8;
	
	#process all files in directory with extension ".new" - update exp. dates and next acc.
	foreach $file (<$path*.new>) {
       	open(TRACES, "+<$file") || die "$f will not open.";
		print "\nOpened: ". $file;
		
		$pos = tell(TRACES);
		$prev_pos = tell(TRACES);
		while(<TRACES>) {												# takes data from $file
			@list = split(" ", $_);
			$prev_pos=$visited{@list[$_URL]};
			if(defined $prev_pos ){										#if already visited add next access time and expiration date
				&update($prev_pos, tell(TRACES), @list[$_Timestamp]);	#for other files ex_date & next_acc are not of interest
			}
			$visited{@list[$_URL]} = "$pos";
			$pos = tell(TRACES);
		}
		close(TRACES);
    }
	print "\nClosed: ". $s;

##########################
# rename URL to number   #
##########################
	sub reURL{
		if(!(defined $visited{$list[$_URL]})) {
			$visited{$list[$_URL]}=$acc_nr;
			$acc_nr++;
		}
		$list[$_URL]=$visited{$list[$_URL]};
	}

#############################################
# update info expiration date & next access # 
#############################################
#arguments: previous position in file, current position in file, access time to be written to previous access
	sub update($prevpos, $pospos, $time){
		my($prevpos, $pospos, $time) = @_;

		seek(TRACES, $prevpos, 0);								#go to previous access
		$_ = <TRACES>;											#read
		substr($_, index($_, ">>>>>>>>>"), 9) =$time;			#add next access time
		if(@list[$_HTTP_code] =~ /TCP_REFRESH_HIT|TCP_REF_FAIL_HIT|TCP_REFRESH_MISS|TCP_IMS_MISS/){
			substr($_, index($_, "<<<<<<<<<"), 9) =$time-1;		#expiration date - aproximation
		}else{
			substr($_, index($_, "<<<<<<<<<"), 9) = "000000000";#no expiration date
		};
		seek(TRACES, $prevpos, 0);
		print TRACES $_;										#update data
		seek(TRACES, $pospos, 0);								#return
	}
