在 GitHub 上編輯此頁面

簡併密碼子方法

問題

有時,核苷酸序列的統計數據僅限於 X 倍簡併的密碼子。以下程式碼提供了一些函數,透過產生子序列來解決此問題。

解決方案

from Bio.Data.CodonTable import unambiguous_dna_by_id


def altcodons(codon, table):
    """List codons that code for the same aminonacid / are also stop.

    @param codon
    @table code table id
    @return list of codons

    """
    tab = unambiguous_dna_by_id[table]

    if codon in tab.stop_codons:
        return tab.stop_codons

    try:
        aa = tab.forward_table[codon]
    except:
        return []

    return [
        k
        for (k, v) in tab.forward_table.iteritems()
        if v == aa and k[0] == codon[0] and k[1] == codon[1]
    ]


def degeneration(codon, table):
    """Determine how many codons code for the same amino acid / are also stop

    @param codon the codon
    @param table code table id
    @param the number of codons also coding for the amino acid codon codes for

    """
    return len(altcodons(codon, table))


def is_x_degenerated(x, codon, table):
    """Determine if codon is x-fold degenerated.

    @param codon the codon
    @param table code table id
    @param true if x <= the degeneration of the codon

    """
    return x <= len(altcodons(codon, table))


def degenerated_subseq(seq, x, table):
    """Get a subsequence consisting of the x-fold degenerated codons only."""
    data = ""
    for i in range(0, len(seq), 3):
        codon = seq[i : i + 3].tostring()
        if isXdegenerated(x, codon, table):
            data += codon
    return data