#!/usr/bin/perl

my @seqNames;
my @probMatrix;
my @thresholds;
my @hitCounts;
my @softCounts;
my @randCounts;
my @infos;
my @complexity;
my @consensus;
my @Z_Scores;
my $minZ=0;
my $maxZ=0;
my @hitListSeq;
my @hitListScore;
my @hitListID;
my @hitListStart;
my @hitListMark;
my @hitListStrand;
my $hitListSeq_lb;
my $hitListScore_lb;
my $hitListID_lb;
my $hitListStart_lb;
my $hitListMark_lb;

$total_pred=0;
$total_known=0;
$correct=0;
$partial=0;
$pred_no_match=0;
$known_no_match=0;
@knowns_found=0;
$num_tfbs =0;
$topX = @ARGV[1];
$tf_dataset = @ARGV[2];
$numBest=0;
$bestPerf=0;
$bestFN=0;
$bestFP=0;
$bestIndex=0;

if($#ARGV < 1){
	printf("Usage: TopX.pl rootName X\n");
}else{
printf("Getting the top %d PSSMs\n", $topX);
#read in the info in the "Best" results file
$rootName = @ARGV[0];
$bestName = $rootName . "_best.txt";
unless (open(BEST, $bestName)) {
	die "Cannot open file: $!"; }
$topfileName = $rootName."_top".$topX.".txt";
open(TOPFILE, ">$topfileName");
@linesBest=<BEST>;
@tmp = split(/\s+/, $linesBest[0]);
$som_r = $tmp[0];
$som_c = $tmp[1];
$trainTime = $tmp[2];
@L = split(/\s+/, $linesBest[1]);
#now read in the best patterns
$i=2;
while($i<=$#linesBest) {
	$numBest++;
	@tmp = split(/\s+/, $linesBest[$i]);
	push(@bestLs, $tmp[0]);
	push(@bestIs, $tmp[1]);
	push(@bestJs, $tmp[2]);
	push(@bestNodes, $tmp[1]."_".$tmp[2]);
	push(@bestPatterns, $tmp[3]);
	push(@bestOrder, $numBest);
	$i++;
}

for($q=0; ($q<$topX && $q<$numBest); $q++)
{
	readFile($bestLs[$q]);

	$r=$q+1;
	print TOPFILE "DE $bestNodes[$q]_L$bestLs[$q]_R$r\n";
	for($i=0; $i<$bestLs[$q]; $i++){
		print TOPFILE "$i\t$probMatrix[$bestIs[$q]][$bestJs[$q]][$i][0]\t$probMatrix[$bestIs[$q]][$bestJs[$q]][$i][1]\t$probMatrix[$bestIs[$q]][$bestJs[$q]][$i][2]\t$probMatrix[$bestIs[$q]][$bestJs[$q]][$i][3]\tX\n";
	}
	print TOPFILE "XX\n";
}
close(TOPFILE);
}
#
# ***************************** READFILE *******************************
#
sub readFile {
	if($_[0] eq ""){ #No arguments... do nothing
	}
	else {
		#initialise
		$minZ=0;
		$maxZ=0;
		
		#read in the info in the current file
		$currL = $_[0];
		$currName = $rootName . "_" . $currL.".txt";
		unless (open(CURR, $currName)) {
			die "Cannot open file: $!"; }
		@linesCurr=<CURR>;
		
		for($i=0; $i<$#linesCurr-1; $i++) {
			if($i<3){ #First and second line are SOM and L info
			}
			if($i==3){ #sequence names
				$j=0;
				do {
					$seqNames[$j]=$linesCurr[$i];
					$j++;
					$i++;
				}until ($linesCurr[$i] eq "*EndSequences\n");
			}
			else {
				if($linesCurr[$i] ne "\n") {
					@tmp = split(/\s+/, $linesCurr[$i]);
					if($tmp[0] eq "*Node:") {
						$x=$tmp[1];
						$y=$tmp[2];
						#print " X:".$x."   Y:".$y;
						#####ProbMatrices
						for($j=0; $j<$currL; $j++) {
							$i++;	
							@tmp = split(/\s+/, $linesCurr[$i]);
							for($b=0; $b<4; $b++) {$probMatrix[$x][$y][$j][$b]=$tmp[$b];}
						}
						
						#####8 pieces of info
						for($j=0; $j<8; $j++) {
							$i++;	
							@tmp2 = split(/\s+/, $linesCurr[$i]);
							
								
							if($tmp2[0] eq "Threshold:") {$thresholds[$x][$y]=$tmp2[1]; }
							elsif($tmp2[0] eq "HitCount:") {$hitCounts[$x][$y]=$tmp2[1]; }
							elsif($tmp2[0] eq "SoftCount:") {$softCounts[$x][$y]=$tmp2[1]; }
							elsif($tmp2[0] eq "RandCount:") {$randCounts[$x][$y]=$tmp2[1]; }
							elsif($tmp2[0] eq "Information:") {$infos[$x][$y]=$tmp2[1]; }
							elsif($tmp2[0] eq "Complexity:") {$complexity[$x][$y]=$tmp2[1]; }
							elsif($tmp2[0] eq "Consensus:") {$consensus[$x][$y]=$tmp2[1]; }
							elsif($tmp2[0] eq "Z_Score:") {
								$Z_Scores[$x][$y]=$tmp2[1]; 
								if($Z_Scores[$x][$y]<$minZ) {$minZ=$Z_Scores[$x][$y];}
								elsif($Z_Scores[$x][$y]>$maxZ) {$maxZ=$Z_Scores[$x][$y];}
							}
						}
						#####List of occurences
						$i++;	
						if($linesCurr[$i] eq "*ListStart\n") {
							$j=0;
							$i++;
							while($i<=$#linesCurr && $linesCurr[$i] ne "*ListEnd\n") {
								@tmp3 = split(/\s+/, $linesCurr[$i]);
								$hitListSeq[$x][$y][$j]=$tmp3[0];
								$hitListScore[$x][$y][$j]=$tmp3[1];
								$hitListID[$x][$y][$j]=$tmp3[2];
								$hitListStart[$x][$y][$j]=$tmp3[3];
								$hitListMark[$x][$y][$j]=$tmp3[4];
								$hitListStrand[$x][$y][$j]=$tmp3[5];
								$j++;
								$i++;
							}
						}
							
						
					}
				}
			}

		}
		close(CURR);
	}
}
