# reading predefined alphabet from file "sample_files/alphabet". # ***** sequence information from sequence set "sample_files/training.fa" ***** # 1 >seq1 (1-500) # 2 >seq2 (1-500) # 3 >seq3 (1-500) # 4 >seq4 (1-500) # 5 >seq5 (1-500) # 6 >seq6 (1-500) # 7 >seq7 (1-500) # 8 >seq8 (1-500) # 9 >seq9 (1-500) # 10 >seq10 (1-500) # 11 >seq11 (1-500) # 12 >seq12 (1-500) # 13 >seq13 (1-500) # 14 >seq14 (1-500) # 15 >seq15 (1-500) # 16 >seq16 (1-500) # 17 >seq17 (1-500) # 18 >seq18 (1-500) # 19 >seq19 (1-500) # 20 >seq20 (1-500) # 21 >seq21 (1-500) # 22 >seq22 (1-500) # 23 >seq23 (1-500) # 24 >seq24 (1-500) # 25 >seq25 (1-500) # 26 >seq26 (1-500) # 27 >seq27 (1-500) # 28 >seq28 (1-500) # 29 >seq29 (1-500) # 30 >seq30 (1-500) # # total number of sequences: 30 # total number of sequence fragments: 30 # **** information on observed frequency and occurrence of each letter. **** # total number of letters in the input sequences = 15000 # A 0.3171; observed occurrence = 4757 # C 0.1805; observed occurrence = 2707 # G 0.1834; observed occurrence = 2751 # T 0.3190; observed occurrence = 4785 # parameter settings: # -m 1, -i 2000, -R 200 # -l 10, -L 10, -P 1 # -s 3.000, -G 3.000, -d 0.942, -D 0.942, -M 0.000, -u 0.000 # |alph| 4, -c 0 # -C 1, -I 0.500, -F 0.500, -N 10000 # -S 17 -W -2.500 2.500 -H -2.500 2.500 -y 1 # -a sample_files/alphabet # -p sample_files/training.fa # -n sample_files/background.fa # symbol priors to be used: # A:0.317 C:0.180 G:0.183 T:0.319 # symbol weight ranges to be used: # A:(-2.87, 2.87) C:(-4.28, 4.28) G:(-4.24, 4.24) T:(-2.86, 2.86) # Maximum Possible Distance Between Sites: 50 nucleotides. # Search Direction for Second Site with respect to First: 0 # Partition against background sequences. START OF RUN: 1 # Unit1: # using random site as initial starting point # Unit2: # using random site as initial starting point # ************************* training ************************** # # itr max(UU) curr(UU) Cons 1 Cons 2 # 0 0.94 0.94 AGTGTAAACC GGGTATGTAC # 200 0.94 0.92 AGTGTAAACC GGGTATGTAC # 400 0.96 0.78 GTTATAAGTC TAGATCGGAT # 600 1.09 0.69 GTTATAAGTC AAGGTCTAAC # 800 1.09 0.68 GTTATAAGTC AAGGTCTAAC # 1000 1.09 0.90 GTTATAAGTC AAGGTCTAAC # 1200 1.09 0.76 GTTATAAGTC AAGGTCTAAC # 1400 1.09 0.94 GTTATAAGTC AAGGTCTAAC # 1600 1.09 0.87 GTTATAAGTC AAGGTCTAAC # 1800 1.09 0.78 GTTATAAGTC AAGGTCTAAC # ******************* results ******************** # # BEST_SITES > UNIT ONE # n seq log_S pos site id # 1 1 12.94 139 : GGTAAAAGTT >seq1 # 2 2 16.29 151 : GGTATAAGAT >seq2 # 3 3 15.65 135 : GACATAAATC >seq3 # 4 4 18.13 31 : GTGATAAGTC >seq4 # 5 5 12.21 262 : ATTATTAAAC >seq5 # 6 6 15.45 295 : GCTATCAATC >seq6 # 7 7 14.77 359 : GTTAAACATC >seq7 # 8 8 17.74 332 : CTTATAAGTC >seq8 # 9 9 17.33 32 : CGTATAAGTC >seq9 # 10 10 17.40 316 : GTTATACCAC >seq10 # 11 11 14.79 163 : GTCATAAACC >seq11 # 12 12 21.03 475 : GTTATAAGTC >seq12 # 13 13 14.28 197 : GTTATTAGTA >seq13 # 14 14 15.36 108 : GCTATGCGAC >seq14 # 15 15 12.44 151 : TATATACATC >seq15 # 16 16 17.08 276 : GTCATAAAAC >seq16 # 17 17 17.99 52 : GGTATATGAC >seq17 # 18 18 16.21 435 : GTTCTAAATC >seq18 # 19 19 16.04 101 : GTTCTAAAAC >seq19 # 20 20 13.69 191 : GTTATATGGA >seq20 # 21 21 16.24 310 : GTTAAAAATC >seq21 # 22 22 15.01 339 : AGTATAAAAC >seq22 # 23 23 18.40 304 : GTTATATGAC >seq23 # 24 24 18.87 166 : GTTATAACAC >seq24 # 25 25 16.00 385 : GCTTTAAGTC >seq25 # 26 26 14.10 241 : TTCATAAGAC >seq26 # 27 27 15.27 135 : GCTATATCAC >seq27 # 28 28 15.18 207 : GGTATAAATT >seq28 # 29 29 19.40 243 : GTTATACGAC >seq29 # 30 30 17.32 366 : GTTATAAGAA >seq30 # ave_log_S 18.20 # # ALIGNMENT_MATRIX # 1 2 3 4 5 6 7 8 9 10 # ------------------------------------------------------------ A| 2 2 0 27 3 26 21 12 14 3 C| 2 4 4 2 0 1 5 3 1 24 G| 24 6 1 0 0 1 0 15 1 0 T| 2 18 25 1 27 2 4 0 14 3 # INFORMATION 5.764 (base e) # # BEST_WEIGHTS ONIT ONE # 1 2 3 4 5 6 7 8 9 10 # ------------------------------------------------------------ A| -1.24 -0.82 -1.31 2.73 -1.03 2.32 1.82 0.43 0.82 -0.49 C| -0.36 -0.34 -0.31 -0.82 -1.15 -0.87 0.36 -0.29 -1.47 3.06 G| 2.93 0.37 -0.70 -0.73 -0.49 -0.59 -1.54 1.71 -0.34 -1.46 T| -1.32 0.79 2.20 -1.18 2.49 -0.89 -0.64 -1.85 0.99 -1.11 # # CONSENSUS GTTATAAGTC # # BEST_SITES UNIT TWO # n seq log_S pos site id # 1 1 6.92 100 : ATTGTCTTGC >seq1 # 2 2 9.71 187 : ACTGTCTAAT >seq2 # 3 3 6.32 95 : AGTGTCCATC >seq3 # 4 4 7.61 18 : AAGGTATTGC >seq4 # 5 5 8.50 248 : AGTGACTAAT >seq5 # 6 6 8.32 264 : AATGACCAAC >seq6 # 7 7 10.10 317 : AAGGACTAAT >seq7 # 8 8 8.92 296 : AATGTGTTAC >seq8 # 9 9 7.77 14 : CTTATCTTAC >seq9 # 10 10 8.86 278 : CGTGTCTAAT >seq10 # 11 11 9.21 145 : ACGATGTAAT >seq11 # 12 12 10.81 462 : AAGATCTGAC >seq12 # 13 13 8.06 235 : AAAATCTTAC >seq13 # 14 14 7.50 75 : AATATATTAT >seq14 # 15 15 9.50 180 : CAGATGTAAC >seq15 # 16 16 7.92 248 : AAGATAAAAC >seq16 # 17 17 6.28 78 : TGGATCTACT >seq17 # 18 18 7.53 407 : AATATCTAGT >seq18 # 19 19 8.75 117 : TCGGTCGAAC >seq19 # 20 20 9.70 168 : AAGATCGGAC >seq20 # 21 21 8.04 263 : ATGATGTGAT >seq21 # 22 22 10.79 327 : AATGTCTAAC >seq22 # 23 23 8.03 263 : CGGGAATAAC >seq23 # 24 24 9.73 136 : AAGATTTAAC >seq24 # 25 25 7.88 339 : AATAACTGAT >seq25 # 26 26 7.53 261 : CGGGACCGAC >seq26 # 27 27 6.98 153 : ACGAGCCAAT >seq27 # 28 28 6.54 239 : ATTGACAAAC >seq28 # 29 29 8.08 264 : CGTGTATAAC >seq29 # 30 30 7.91 404 : TGGATATAAC >seq30 # ave_log_S 10.37 # # ALIGNMENT_MATRIX # 1 2 3 4 5 6 7 8 9 10 # ------------------------------------------------------------ A| 21 14 1 15 7 6 2 19 25 0 C| 6 4 0 0 0 19 4 0 1 19 G| 0 8 15 15 1 4 2 5 3 0 T| 3 4 14 0 22 1 22 6 1 11 # INFORMATION 4.693 (base e) # # BEST_WEIGHTS UNIT TWO # 1 2 3 4 5 6 7 8 9 10 # ------------------------------------------------------------ A| 1.18 0.48 -1.05 0.83 0.12 -0.36 -0.97 0.80 1.81 -1.09 C| 0.37 0.04 -0.98 -0.75 -0.62 1.07 -0.18 -1.03 -0.60 1.28 G| -1.11 -0.00 1.58 1.31 -0.78 -0.07 0.02 0.17 -0.33 -0.82 T| -0.44 -0.51 0.46 -1.40 1.28 -0.64 1.13 0.06 -0.88 0.64 # # CONSENSUS AAGGTCTAAC # # >> Best iter for two units combined: 476 # >> Maximum conbined energy for both units (MaxUU): 1.08811 # >> MaxU1: 7.55905 MaxU2: 6.68793