#!/usr/bin/perl my @seqNames; my @probMatrix; my @thresholds; my @hitCounts; my @softCounts; my @randCounts; my @infos; my @complexity; my @consensus; my @Z_Scores; my $minZ=0; my $maxZ=0; my @hitListSeq; my @hitListScore; my @hitListID; my @hitListStart; my @hitListMark; my @hitListStrand; my $hitListSeq_lb; my $hitListScore_lb; my $hitListID_lb; my $hitListStart_lb; my $hitListMark_lb; $total_pred=0; $total_known=0; $correct=0; $partial=0; $pred_no_match=0; $known_no_match=0; @knowns_found=0; $num_tfbs =0; $topX = @ARGV[1]; $tf_dataset = @ARGV[2]; $numBest=0; $bestPerf=0; $bestFN=0; $bestFP=0; $bestIndex=0; if($#ARGV < 1){ printf("Usage: TopX.pl rootName X\n"); }else{ printf("Getting the top %d PSSMs\n", $topX); #read in the info in the "Best" results file $rootName = @ARGV[0]; $bestName = $rootName . "_best.txt"; unless (open(BEST, $bestName)) { die "Cannot open file: $!"; } $topfileName = $rootName."_top".$topX.".txt"; open(TOPFILE, ">$topfileName"); @linesBest=; @tmp = split(/\s+/, $linesBest[0]); $som_r = $tmp[0]; $som_c = $tmp[1]; $trainTime = $tmp[2]; @L = split(/\s+/, $linesBest[1]); #now read in the best patterns $i=2; while($i<=$#linesBest) { $numBest++; @tmp = split(/\s+/, $linesBest[$i]); push(@bestLs, $tmp[0]); push(@bestIs, $tmp[1]); push(@bestJs, $tmp[2]); push(@bestNodes, $tmp[1]."_".$tmp[2]); push(@bestPatterns, $tmp[3]); push(@bestOrder, $numBest); $i++; } for($q=0; ($q<$topX && $q<$numBest); $q++) { readFile($bestLs[$q]); $r=$q+1; print TOPFILE "DE $bestNodes[$q]_L$bestLs[$q]_R$r\n"; for($i=0; $i<$bestLs[$q]; $i++){ print TOPFILE "$i\t$probMatrix[$bestIs[$q]][$bestJs[$q]][$i][0]\t$probMatrix[$bestIs[$q]][$bestJs[$q]][$i][1]\t$probMatrix[$bestIs[$q]][$bestJs[$q]][$i][2]\t$probMatrix[$bestIs[$q]][$bestJs[$q]][$i][3]\tX\n"; } print TOPFILE "XX\n"; } close(TOPFILE); } # # ***************************** READFILE ******************************* # sub readFile { if($_[0] eq ""){ #No arguments... do nothing } else { #initialise $minZ=0; $maxZ=0; #read in the info in the current file $currL = $_[0]; $currName = $rootName . "_" . $currL.".txt"; unless (open(CURR, $currName)) { die "Cannot open file: $!"; } @linesCurr=; for($i=0; $i<$#linesCurr-1; $i++) { if($i<3){ #First and second line are SOM and L info } if($i==3){ #sequence names $j=0; do { $seqNames[$j]=$linesCurr[$i]; $j++; $i++; }until ($linesCurr[$i] eq "*EndSequences\n"); } else { if($linesCurr[$i] ne "\n") { @tmp = split(/\s+/, $linesCurr[$i]); if($tmp[0] eq "*Node:") { $x=$tmp[1]; $y=$tmp[2]; #print " X:".$x." Y:".$y; #####ProbMatrices for($j=0; $j<$currL; $j++) { $i++; @tmp = split(/\s+/, $linesCurr[$i]); for($b=0; $b<4; $b++) {$probMatrix[$x][$y][$j][$b]=$tmp[$b];} } #####8 pieces of info for($j=0; $j<8; $j++) { $i++; @tmp2 = split(/\s+/, $linesCurr[$i]); if($tmp2[0] eq "Threshold:") {$thresholds[$x][$y]=$tmp2[1]; } elsif($tmp2[0] eq "HitCount:") {$hitCounts[$x][$y]=$tmp2[1]; } elsif($tmp2[0] eq "SoftCount:") {$softCounts[$x][$y]=$tmp2[1]; } elsif($tmp2[0] eq "RandCount:") {$randCounts[$x][$y]=$tmp2[1]; } elsif($tmp2[0] eq "Information:") {$infos[$x][$y]=$tmp2[1]; } elsif($tmp2[0] eq "Complexity:") {$complexity[$x][$y]=$tmp2[1]; } elsif($tmp2[0] eq "Consensus:") {$consensus[$x][$y]=$tmp2[1]; } elsif($tmp2[0] eq "Z_Score:") { $Z_Scores[$x][$y]=$tmp2[1]; if($Z_Scores[$x][$y]<$minZ) {$minZ=$Z_Scores[$x][$y];} elsif($Z_Scores[$x][$y]>$maxZ) {$maxZ=$Z_Scores[$x][$y];} } } #####List of occurences $i++; if($linesCurr[$i] eq "*ListStart\n") { $j=0; $i++; while($i<=$#linesCurr && $linesCurr[$i] ne "*ListEnd\n") { @tmp3 = split(/\s+/, $linesCurr[$i]); $hitListSeq[$x][$y][$j]=$tmp3[0]; $hitListScore[$x][$y][$j]=$tmp3[1]; $hitListID[$x][$y][$j]=$tmp3[2]; $hitListStart[$x][$y][$j]=$tmp3[3]; $hitListMark[$x][$y][$j]=$tmp3[4]; $hitListStrand[$x][$y][$j]=$tmp3[5]; $j++; $i++; } } } } } } close(CURR); } }