	#!/usr/bin/perl
	#
	# predict delays between accesses
	#
	
	#define constants
		
	$_Timestamp=0; 	$_Elapsed_Time=1;	$_HTTP_code=2;  
	$_Size=3; 	$_Method=4; 		$_URL=5;
	$_Exp_Date=6;	$_Next_Acc=7; 		$_Cacheable=8;
	$elapsed_time = 0;	#simulation time
	$dhit = 0;	#correctly predicted delays
	$dmiss = 0;	#incorrectly predicted delays
	$all_free = 100000000;		#cache size
	$pd_free = $all_free;	#predicted delay cache size
	$cur_time=904201218;


   	#open log file
	open(LOG, "+<rez_pd.txt") || die "$0: rez_pred_delays.txt will not open.";
	seek(LOG, 0,2);
	&load_ptable;
	&load_ttable;
	#&fill_que;

	#process all files in directory with extension ".new"
	foreach $file (<*.new>) {
       	open(TRACES, "<$file") || die "$file will not open.";
		print "\nOpened: ". $file. "\n";
		seek(TRACES, 0,2); $file_size = tell(TRACES); seek(TRACES, 0,0); 
		
		$pos = tell(TRACES);
		$prev_pos = tell(TRACES);
		$start_time = time();
		while(<TRACES>) { # takes data from $file
			$proc=tell(TRACES)/$file_size;
			print "\r$cur_time done: ".$proc." of 1     ";
			#if($proc>0.025){last;}
			@list = split(" ", $_);
			if($list[$_Cacheable]){
				&pd;
			}
		}
		$end_time = time();
		close(TRACES);
		$elapsed_time += ($end_time-$start_time);
		if($dmiss>0){
			print "\nHits:$dhit Misses: $dmiss hitrate: ". ($dhit/($dhit+$dmiss)*100)."\n";
		}
		print "\nClosed: ". $file."\n";
		print "\nElapsed: $elapsed_time seconds";

    }
	&std_stat;
	&log_stat;

## end ##


sub std_stat{
		print "Size = $all_free bytes, elapsed $elapsed_time seconds	atp refresh=$atp_refresh\n";
		if($pd_access){print "pd     : Acc = $pd_access, Hits = $pd_hits (".($pd_hits/$pd_access*100)."%), Miss = $pd_miss \n".
							   "		   WAcc = $pd_waccess, WHits = $pd_whits (".($pd_whits/$pd_waccess*100)."%), WMiss = $pd_wmiss \n";}
		print "Hits:$dhit Misses: $dmiss hitrate: ". ($dhit/($dhit+$dmiss)*100)."\n";
}
sub log_stat{
		print LOG "Size = $all_free, elapsed $elapsed_time seconds	atp refresh=$atp_refresh\n";
		if($pd_access){print LOG "pd     : Acc = $pd_access, Hits = $pd_hits (".($pd_hits/$pd_access*100)."%), Miss = $pd_miss \n".
										"WAcc = $pd_waccess, WHits = $pd_whits (".($pd_whits/$pd_waccess*100)."%), WMiss = $pd_wmiss \n";}
		print LOG "Hits:$dhit Misses: $dmiss hitrate: ". ($dhit/($dhit+$dmiss)*100)."\n";
}

	#&storeaa(\%stat, ">rez_pred_delays.txt");


#############################################
# algorythms
#############################################


###########
# DELAYS  #
###########

	sub predict_delay{									#tested:OK
		my($del, $pom, $prev_acc, $state, $pstate, $nstate, $prev_scd, $next_scd, $size, $pomlist);
		if(defined($pd_cache{$list[$_URL]})){			#tested:OK
														#accessed before
			$pomlist=$pd_cache{$list[$_URL]};
			#print "accessed before: @$pomlist\n";
			$prev_acc=shift(@$pomlist);					#last access time
			$prev_scd=shift(@$pomlist);					#last schedule time
			$size=shift(@$pomlist);						#size of the object
			$del = $list[$_Timestamp]-$prev_acc;		#last delay between accesses
			&det_state(\$del, \$state);					#current state
			#print "del=$del, state=$state ";
			push(@$pomlist, $state);					#record current state
			$pstate=0;									#for correct prediction
			&next_state($pomlist, \$pstate);			#predict next state
			$next_scd=$cur_time+$ttable[$pstate];		#next schedule time
			#print "pstate=$pstate ";
			unshift(@$pomlist, $size);					#record size
			unshift(@$pomlist, $next_scd);				#record schedule time
			unshift(@$pomlist, $list[$_Timestamp]);		#record next state
			$pd_cache{$list[$_URL]}=$pomlist;
			$del = $list[$_Next_Acc]-$list[$_Timestamp];#verify prediction
			&det_state(\$del, \$nstate);
			#print "nstate=$nstate next_scd=$next_scd\n";
			if($state<=$pstate && $nstate >= 0){$dhit++;}else{$dmiss++;};
		}else{											#tested:OK
														#accessed never before
			$state=11;
			push(@$pomlist, 11);
			$pstate=0;									#for correct prediction
			&next_state($pomlist, \$pstate);			#predict next state
			$next_scd=$cur_time+$ttable[$pstate];		#next schedule time
			#print "cur_time=$cur_time pstate=$pstate ";
			$del = $list[$_Next_Acc]-$list[$_Timestamp];#verify prediction
			&det_state(\$del, \$nstate);
			#print "nstate=$nstate next_scd=$next_scd\n";
			if($state<=$pstate && $nstate >= 0){$dhit++;}else{$dmiss++;};

			if($state>=0){
			  $pomlist = [$list[$_Timestamp], $next_scd, $list[$_Size], 11];
			  $pd_cache{$list[$_URL]}=$pomlist;
			}
			#print "accessed never before: @$pomlist\n";
		}
														#tested:OK
		print "hitrate: ". ($dhit/($dhit+$dmiss)*100)."      ";
																					
		$pom=$schedule[$pstate][$state];
		$pom1=$pom->{$next_scd};
		if(!defined($pom1)){ $pom1=[$list[$_URL]];
		}else{ push(@$pom1, $list[$_URL]); }
		#if(!defined($pom1)){print "jos nedef pom1 "}else{print " url: $list[$_URL] pom1: @$pom1  "}
		$pom->{$next_scd}=$pom1;
		$schedule[$pstate][$state]=$pom;
		#print "scheduled: pstate=$pstate state=$state @$pom1 \n";
	}

	sub pd{
		while($cur_time<$list[$_Timestamp]){
			$cur_time++;
			$clockup=1;
			&reschedule;
		}
		#print "$list[$_URL] $list[$_Size]\n";
		if($all_free*0.1 < $list[$_Size]){
			#too big to be cached					#tested:OK
			$pd_miss++;
			$pd_wmiss += $list[$_Size];
			$pd_access++;
			$pd_waccess+=$list[$_Size];
			return 0;
		}elsif(defined $pd_cache{$list[$_URL]}){
			#found in the cache 					#tested:OK
			#print"found in the cache\n";
			$pd_hits++;
			$pd_whits += $list[$_Size];
		}else{
			#not found in the cache					#tested:OK without &remove
			$pd_miss++;
			$pd_wmiss += $list[$_Size];
			
			# if there is no free space in the cache...
			while($pd_free < $list[$_Size]){
				#...free some space
				#$pd_free += $pd_cache{$pd_sorted_access[0]};
				#delete($pd_cache{$pd_sorted_access[0]});
				#shift(@pd_sorted_access);
				&remove($list[$_Size]);
				#print " **** \n";
			}
			
			#now there is free space in cache		#tested:OK without &remove
			$pd_free -= $list[$_Size];				#reserve some space
			#print "pd_free=$pd_free\n";
			#push(@pd_sorted_access, $list[$_URL]);	#add to the tail of the list
		}
		
		#update cache for all cases					#tested:OK without &predict delay &remove
		&predict_delay;								#predict delay, put in queue, cache...
		$pd_access++;								#statistics
		$pd_waccess+=$list[$_Size];

	}

#########################
#other functions		#
#########################

	#loads probability table
	sub load_ptable{
		my($i, $j, $pom, %apt, $key);
   		#open probability file
		open(PROB, "<probabilities.txt") || die "$0: probabilities.txt will not open.";
		
		for($i=0; $i<12; $i++){
			for($j=0; $j<12; $j++){
				$ptable[$i][$j]=<PROB>;
				chop($ptable[$i][$j]);
				$apt{$j}=$ptable[$i][$j];
				#print $ptable[$i][$j]." ";
			}
			$j=0; print "i=$i: ";
			foreach $key (sort { $apt{$a} <=> $apt{$b} } keys %apt) {
				$aptable[$i][$j++]=$key;
				print "$key ";
			}
			print "\n";
		}
		
		close(PROB);
	}

	#loads table of delays $ttable[state]=delay;
	sub load_ttable{
		$ttable[0]=32; $ttable[1]=195; $ttable[2]=1005; 
		$ttable[3]=3116; $ttable[4]=7910; $ttable[5]=17463; 
		$ttable[6]=34918; $ttable[7]=66842; $ttable[8]=116272; 
		$ttable[9]=343966; $ttable[10]=343966; $ttable[11]=343966; 
	}

	#for given delay returnes state
	sub det_state($$){
		my($lok_del, $lok_state); 
		$lok_del=shift(@_); $lok_state=shift(@_);
			if($$lok_del>=0){
			  if($$lok_del<=32){			$$lok_state=0;
			  }elsif($$lok_del<=195){		$$lok_state=1;
			  }elsif($$lok_del<=1005){		$$lok_state=2;
			  }elsif($$lok_del<=3116){		$$lok_state=3;
			  }elsif($$lok_del<=7910){		$$lok_state=4;
			  }elsif($$lok_del<=17463){		$$lok_state=5;
			  }elsif($$lok_del<=34918){		$$lok_state=6;
			  }elsif($$lok_del<=66842){		$$lok_state=7;
			  }elsif($$lok_del<=116272){	$$lok_state=8;
			  }elsif($$lok_del<=343966){	$$lok_state=9;
			  }else{						$$lok_state=10;
			  }
			}else{	$$lok_state=-1;
			}

	}
	
	#for given history returnes next state greater or equal to state found in 2nd argument
	#2nd argument should be (0 for new entrys or hits) or (previous missed state for misses)
	sub next_state($$){
		my($i, $max_prob, $lok_prevdel, $lok_nstate, $lok_laststate, $old_state, $found); 
		$lok_prevdel=shift(@_); $lok_nstate=shift(@_); $max_prob=0; 
		$lok_laststate=$lok_prevdel->[$#$lok_prevdel];
		#print "No. dels:".$#$lok_prevdel." laststate=$lok_laststate\n";
		$old_state=$$lok_nstate;												#for given history returnes next state greater or equal to state found in 2nd argument
		$$lok_nstate=11;														#default state - infidelity
		
		if($lok_laststate!=11){													#accessed more than once
			$found=0;
			for($i=$#$lok_prevdel; $i>=0; $i--){								#find state with higher probability of being the next one for given state
				if($lok_prevdel->[$i]!=11 && $lok_prevdel->[$i]>=$old_state){	#avoid going to state 11 and finding state less than given
					if($max_prob<$ptable[$lok_laststate][$lok_prevdel->[$i]]){
						$max_prob=$ptable[$lok_laststate][$lok_prevdel->[$i]];
						$$lok_nstate=$lok_prevdel->[$i];
						$found++;
					}
				}
			}
			if(!$found){														#tested:OK
				#print "Not found!\n";											#next state could not be found among previous
				for($i=9; $i>=0; $i--){											#find most possible state
					if($max_prob<$ptable[$lok_laststate][$i] && $i>=$old_state){
						$max_prob=$ptable[$lok_laststate][$i];
						$$lok_nstate=$i;
						$found++;
					}
					if($$lok_nstate==10){										#avoid state 10
						$$lok_nstate=11;
						#print "\n10n u 11\n"
					};						
				}
				#print "Found=$found max_prob=$max_prob old_state=$old_state lok_laststate=$lok_laststate\n";														#next state could not be found among previous
			}
		}else{		#tested:OK													#previously in state 11
			if($#$lok_prevdel==0){												#first reaccess to the same object
				#for($i=11; $i>=0; $i--){										#find most possible state
					#if($max_prob<$ptable[11][$i] && i>=$old_state){				#greater than current state (state eaven it is not accessed)
					#	$max_prob=$ptable[11][$i];
					#	$$lok_nstate=$i;
					#}
					#if($$lok_nstate==10){										#avoid state 10
					#	$$lok_nstate=11;
					#	#print "\n10 u 11\n"
					#};
				#}
				$$lok_nstate=($old_state<10)?$old_state:11;						#if not visited yet it is rescheduled for the next greater time
			}else{																#previously visited but not reaccessed for long time
				$$lok_nstate==11;
				#print "\n11 u 11\n"
			}
		}
		#print " nextstate=$$lok_nstate\n";
	}

	#when
	sub reschedule{														#tested:OK (hope)
		my($i, $pom, %pom, $lpom1, $lpom, @lok_list, $url);

		#print "\nReschedule ";
		for($i=11; $i>=0; $i--){
			for($j=11; $j>=0; $j--){
				$lpom=$schedule[$i][$j];
				$lpom1=$lpom->{$cur_time};								#getting from scd queue
				#if(@$lpom1){print "cur_time=$cur_time lpom1=@$lpom1 ;\n";}
				while(defined($url = shift(@$lpom1))){
					$pomlist=$pd_cache{$url};							#get data for this object
					if($pomlist->[1]==$cur_time){						#if scheduled for this time slot (not rescheduled on hit=>look at last schedule time)
						#print "Rescheduling cur_time=$cur_time $url @$pomlist\n";
						$prev_acc=shift(@$pomlist);						#last access time
						$prev_scd=shift(@$pomlist);						#last schedule time
						$size=shift(@$pomlist);							#size of the object
						$del = $cur_time-$prev_acc;						#delay between now and last accesses
						&det_state(\$del, \$state);						#current state
						#print " del=$del, state=$state ";
						push(@$pomlist, $state);						#record current state (only for prediction)
						$pstate=$state+1;								#for correct prediction need greater stete (this one is missed). Some heuristics could be added here!
						&next_state($pomlist, \$pstate);				#predict next state
						pop(@$pomlist);									#current state not needed anymore
						$next_scd=$prev_acc+$ttable[$pstate];			#next schedule time
						#print "pstate=$pstate next_scd=$next_scd\n";
						unshift(@$pomlist, $size);						#record size
						unshift(@$pomlist, $next_scd);					#record schedule time
						unshift(@$pomlist, $prev_acc);					#record next state
						$pd_cache{$url}=$pomlist;
					
						$pom=$schedule[$pstate][$state];				#update scd queue
						$pom1=$pom->{$next_scd};						#
						if(!defined($pom)||!defined($pom)){				#
							$pom->{$next_scd}=[$url];					#
						}else{											#
							push(@$pom1, $url); 						#
							$pom->{$next_scd}=$pom1;					#
						}
						$schedule[$pstate][$state]=$pom;				#
						#print " $pstate $state $cur_time $next_scd $schedule[$pstate][$state]->{$next_scd}->[-1] \n";
					}else{
						#print "Rescheduled for another time\n";
					}
				}
				delete $lpom->{$cur_time};
				$schedule[$i][$j]=$lpom;								#update scd queue
				#print "all: $i $j $cur_time $next_scd $schedule[$i][$j]->{$next_scd}->[-1] \n";

			}
		}


	}

	sub remove($){
		my($time, $howmuch, $lpom, $lpom1, @lok_list, $url, @pomlist, @probs, @pstats, $i, $j, $k, $del, $pomstate);
		$howmuch=shift @_;
		
		#print "\nRemove\n";
		while($howmuch>0){ 
		#print "howmuch $howmuch\n";
			
			for($i=11;$i>=0;$i--){										#removes those scheduled for later accesses
				for($j=0;$j<=11;$j++){									#the less probable state
					$k=$aptable[$i][$j];								#removes those with less probability of being accessed in thist interval
					$lpom=$schedule[$k][$i];							#getting from scd queue
					#print "k=$k lpom=%$lpom\n";
					#if(defined(%$lpom)){print "defined lpom ";};
					###while (($time,$lpom1) = each %$lpom) {
					foreach $time (sort{$b<=>$a}(keys %$lpom)){			#this and next line disable if enable previous line
						$lpom1=$$lpom{$time};							#
						#print "new time $time url=@$lpom1\n";
						while(defined($url = shift(@$lpom1))){
							#print "new url $url\n";
							$pomlist=$pd_cache{$url};					#get data for this object
							$prev_acc=shift(@$pomlist);					#last access time
							$prev_scd=shift(@$pomlist);					#last schedule time
							if($prev_scd==$time){						#delete only those not rescheduled
								$size=shift(@$pomlist);					#size of the object
								$howmuch-=$size;						#free space
								$pd_free+=$size;						#free space
								delete $pd_cache{$url};					#free cache tag
							}else{										#object rescheduled so not deleted
								unshift(@$pomlist, $prev_scd);			#last schedule time
								unshift(@$pomlist, $prev_acc);			#last access time
								$pd_cache{$url}=$pomlist;				#set data for this object
								#print  "\napt=$k i,j=".$i." ". $j ." prevscd=$prev_scd prevacc=$prev_acc del=$del pomstate=$pomstate\n";
								
							}
						}
						delete $lpom->{$time};
						$schedule[$i][$k]=$lpom;						#free entry in the queue
						#print "\nhowmuch=$howmuch pd_free=$pd_free\n";
					}
					if($howmuch<=0){last;}
				}
				if($howmuch<=0){last;}
			}
		}

	}

############################################
#subroutine for dumping asoc array to file #
############################################

sub storeaa($$){
	my $a=shift(@_), $f=shift(@_), $key;

	open(DMP, "$f") || die "$0: will not open.";
	
		while (($key, $val) = each (%$a) ){
			print DMP $key." ".$val."\n";
			#print DMP $$a{$key}."\n";
		}

	close DMP;
}

sub loadaa($$){
	my $a=shift(@_), $f=shift(@_), $key;

	open(DMP, "<$f") || die "$0: will not open.";
	
		while(<DMP>) { # takes data from $file
			@list = split(" ", $_);
			$$a{$list[0]}=$list[1];
		}

	close DMP;
}
