Читать книгу Informatics and Machine Learning. From Martingales to Metaheuristics онлайн
88 страница из 101
-------------------- prog2.py addendum 5 -------------------- # see that 'tag' is anomolous, want to get sense of the distribution on # gaps between 'tag' (still satepping 'in-frame'). def codon_gap_counter ( seq, frame, delimiter ): counts = {} pattern = '[acgtACGT]' result = re.findall(pattern, seq) seqlen = len(seq) # probs = np.empty((0)) oldindex=0 for index in range(frame,seqlen-2): if (index+3-frame)%3!=0: continue codon = result[index]+result[index+1]+result[index+2] if codon!=delimiter: continue else: gap = index - oldindex quant = 100 bin = gap/quant if oldindex!=0: if bin in counts: counts[bin]+=1 else: counts[bin]=1 oldindex=index npcounts = np.empty((0)) for i in sorted(counts): npcounts = np.append(npcounts,counts[i]+0.0) print "gapbin", i, "count =", counts[i] probs = count_to_freq(npcounts) return probs # usage: delimiters = ("AAA","AAC","AAG","AAT","ACA","ACC","ACG","ACT", "AGA","AGC","AGG","AGT","ATA","ATC","ATG","ATT", "CAA","CAC","CAG","CAT","CCA","CCC","CCG","CCT", "CGA","CGC","CGG","CGT","CTA","CTC","CTG","CTT", "GAA","GAC","GAG","GAT","GCA","GCC","GCG","GCT", "GGA","GGC","GGG","GGT","GTA","GTC","GTG","GTT", "TAA","TAC","TAG","TAT","TCA","TCC","TCG","TCT", "TGA","TGC","TGG","TGT","TTA","TTC","TTG","TTT") for delimiter in delimiters: print "\n\ndelimiter is", delimiter codon_gap_counter(EC_sequence,0,delimiter) ----------------- prog2.py addendum 5 end ------------------