生信算法5 - 序列比对之全局比对算法

发布时间:2023年12月23日

生信序列基本操作算法

建议在Jupyter实践,python版本3.9

1. 基础定义

# 字母表
alphabet = ['A', 'C', 'G', 'T']
# 记分矩阵
score = [[0, 4, 2, 4, 8],
         [4, 0, 4, 2, 8],
         [2, 4, 0, 4, 8],
         [4, 2, 4, 0, 8],
         [8, 8, 8, 8, 8]]

2. 根据字母表获取打分矩阵索引值

# 根据字母表获取记分矩阵索引值
alphabet.index('A')
# 0

alphabet.index('G')
# 2

# 获取比对上的矩阵分值
score[alphabet.index('A')][alphabet.index('T')]
# 4

score[alphabet.index('C')][-1]
# 8

3. 全局比对算法python实现

def globalAlignment(x, y):
    # 字母表
    alphabet = ['A', 'C', 'G', 'T']
    # 打分矩阵
    score = [[0, 4, 2, 4, 8],
             [4, 0, 4, 2, 8],
             [2, 4, 0, 4, 8],
             [4, 2, 4, 0, 8],
             [8, 8, 8, 8, 8]]

    # 创建矩阵
    D = []
    for i in range(len(x)+1):
        D.append([0] * (len(y)+1))
    
    # 初始化第一列
    for i in range(1, len(x)+1):
        D[i][0] = D[i-1][0] + score[alphabet.index(x[i-1])][-1]
    
    # 初始化第一行
    for j in range(1,len(y)+1):
        D[0][j] = D[0][j-1] + score[-1][alphabet.index(y[j-1])]
    
    # 填充矩阵剩余部分
    for i in range(1, len(x)+1):
        for j in range(1, len(y)+1):
            distHor = D[i][j-1] + score[-1][alphabet.index(y[j-1])]
            distVer = D[i-1][j] + score[alphabet.index(x[i-1])][-1]
            distDiag = D[i-1][j-1] + score[alphabet.index(x[i-1])][alphabet.index(y[j-1])]
            D[i][j] = min(distHor, distVer, distDiag)
    
    # 打印矩阵
    for _i in range(1, len(x)+1):
        print(D[_i])
    
    # 返回矩阵右下角值,即为比对的最后得分
    return D[-1][-1] 

4. 测试算法

x = 'TATGTCATGC'
y = 'TATGGCAGC'
print(globalAlignment(x,y))

[8, 0, 8, 16, 24, 32, 40, 48, 56, 64]
[16, 8, 0, 8, 16, 24, 32, 40, 48, 56]
[24, 16, 8, 0, 8, 16, 24, 32, 40, 48]
[32, 24, 16, 8, 0, 8, 16, 24, 32, 40]
[40, 32, 24, 16, 8, 4, 10, 18, 26, 34]
[48, 40, 32, 24, 16, 12, 4, 12, 20, 26]
[56, 48, 40, 32, 24, 18, 12, 4, 12, 20]
[64, 56, 48, 40, 32, 26, 20, 12, 8, 14]
[72, 64, 56, 48, 40, 32, 28, 20, 12, 12]
[80, 72, 64, 56, 48, 40, 32, 28, 20, 12]
# 分值12


x = 'CGGGAACAGGCCATGACACACTTGGGCACACCCACCCACTCCAAGGCACA'
y = 'CATACGCACACGTGCACACAGATGTGCCCGCCCGCACAGGTGGGCATCGG'
print(globalAlignment(x,y))
[8, 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384, 392]
[16, 8, 2, 10, 18, 26, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384]
[24, 16, 10, 6, 12, 20, 26, 34, 42, 50, 58, 66, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376]
[32, 24, 18, 14, 8, 16, 20, 28, 36, 44, 52, 60, 66, 74, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368]
[40, 32, 24, 22, 14, 12, 18, 24, 28, 36, 44, 52, 60, 68, 76, 84, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360]
[48, 40, 32, 28, 22, 18, 14, 22, 24, 32, 36, 44, 52, 60, 68, 76, 84, 92, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352]
[56, 48, 40, 34, 30, 22, 22, 14, 22, 24, 32, 36, 44, 52, 60, 68, 76, 84, 92, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344]
[64, 56, 48, 42, 34, 30, 24, 22, 14, 22, 24, 32, 38, 46, 54, 62, 68, 76, 84, 92, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336]
[72, 64, 56, 50, 42, 38, 30, 28, 22, 18, 24, 28, 32, 40, 46, 54, 62, 70, 78, 86, 94, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328]
[80, 72, 64, 58, 50, 46, 38, 34, 30, 26, 20, 28, 28, 36, 40, 48, 56, 64, 72, 80, 88, 94, 98, 106, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320]
[88, 80, 72, 66, 58, 50, 46, 38, 38, 30, 28, 20, 28, 30, 38, 40, 48, 56, 64, 72, 80, 88, 96, 100, 108, 114, 122, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312]
[96, 88, 80, 74, 66, 58, 54, 46, 42, 38, 34, 28, 24, 30, 34, 38, 44, 48, 56, 64, 72, 80, 88, 96, 104, 110, 118, 122, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304]
[104, 96, 88, 82, 74, 66, 60, 54, 46, 46, 38, 36, 30, 28, 32, 38, 38, 46, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 126, 132, 138, 146, 154, 162, 170, 178, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296]
[112, 104, 96, 88, 82, 74, 68, 62, 54, 48, 46, 40, 38, 30, 32, 34, 42, 40, 48, 50, 58, 66, 74, 80, 88, 96, 104, 112, 120, 128, 136, 140, 148, 156, 164, 172, 180, 186, 194, 202, 210, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288]
[120, 112, 104, 96, 90, 82, 74, 70, 62, 56, 50, 48, 40, 38, 30, 36, 36, 44, 42, 50, 52, 58, 66, 74, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 156, 164, 172, 180, 188, 194, 202, 210, 216, 224, 232, 240, 248, 256, 264, 272, 280]
[128, 120, 112, 104, 96, 90, 82, 78, 70, 64, 56, 54, 48, 44, 38, 34, 36, 40, 44, 46, 50, 54, 58, 66, 74, 82, 90, 98, 106, 114, 122, 130, 138, 146, 154, 160, 164, 172, 180, 188, 196, 204, 212, 218, 226, 234, 240, 248, 256, 264, 272]
[136, 128, 120, 112, 104, 96, 90, 82, 78, 70, 64, 56, 56, 50, 46, 38, 38, 36, 44, 44, 50, 54, 58, 60, 68, 76, 84, 90, 98, 106, 114, 122, 130, 138, 146, 154, 162, 164, 172, 180, 188, 196, 204, 212, 220, 226, 234, 242, 248, 256, 264]
[144, 136, 128, 120, 112, 104, 98, 90, 82, 78, 70, 64, 58, 58, 52, 46, 38, 42, 36, 44, 44, 52, 54, 62, 62, 70, 78, 86, 94, 102, 108, 116, 124, 132, 140, 148, 154, 162, 164, 172, 180, 188, 196, 204, 212, 220, 226, 234, 242, 250, 258]
[152, 144, 136, 128, 120, 112, 106, 98, 90, 82, 78, 70, 66, 60, 60, 52, 46, 38, 44, 36, 44, 48, 56, 56, 64, 64, 72, 78, 86, 94, 102, 108, 116, 124, 132, 140, 148, 154, 162, 168, 176, 182, 190, 198, 206, 212, 220, 228, 234, 242, 250]
[160, 152, 144, 136, 128, 120, 114, 106, 98, 90, 82, 78, 72, 68, 62, 60, 52, 46, 38, 44, 36, 44, 48, 56, 58, 66, 66, 74, 82, 90, 96, 104, 112, 120, 126, 134, 140, 148, 154, 162, 170, 178, 184, 192, 200, 208, 212, 220, 228, 236, 244]
[168, 160, 152, 144, 136, 128, 122, 114, 106, 98, 90, 82, 80, 74, 70, 62, 60, 52, 46, 38, 44, 40, 48, 50, 58, 60, 68, 66, 74, 82, 90, 96, 104, 112, 120, 126, 134, 140, 148, 156, 164, 172, 180, 188, 196, 200, 208, 214, 220, 228, 236]
[176, 168, 160, 152, 144, 136, 130, 122, 114, 106, 98, 90, 86, 80, 78, 70, 66, 60, 54, 46, 42, 48, 44, 48, 54, 58, 64, 70, 68, 76, 84, 92, 98, 106, 114, 122, 130, 136, 144, 152, 160, 164, 172, 180, 188, 196, 204, 208, 216, 224, 232]
[184, 176, 168, 160, 152, 144, 138, 130, 122, 114, 106, 98, 94, 86, 84, 78, 74, 68, 62, 54, 50, 46, 52, 44, 52, 54, 62, 66, 72, 70, 78, 86, 94, 100, 108, 116, 124, 132, 140, 148, 156, 160, 168, 176, 184, 190, 198, 204, 210, 218, 226]
[192, 184, 176, 168, 160, 152, 144, 138, 130, 122, 114, 106, 98, 94, 86, 86, 80, 76, 70, 62, 56, 50, 48, 52, 44, 52, 54, 62, 70, 76, 70, 78, 86, 94, 100, 108, 116, 124, 132, 140, 148, 156, 160, 168, 176, 184, 192, 200, 208, 210, 218]
[200, 192, 184, 176, 168, 160, 152, 146, 138, 130, 122, 114, 106, 102, 94, 90, 88, 84, 78, 70, 64, 56, 52, 52, 52, 48, 52, 58, 66, 74, 76, 74, 82, 90, 94, 102, 110, 118, 126, 132, 140, 148, 156, 160, 168, 176, 184, 192, 200, 208, 210]
[208, 200, 192, 184, 176, 168, 160, 154, 146, 138, 130, 122, 114, 110, 102, 98, 92, 92, 86, 78, 72, 64, 58, 56, 52, 56, 48, 56, 62, 70, 74, 80, 78, 86, 90, 98, 104, 112, 120, 126, 132, 140, 148, 156, 160, 168, 176, 184, 192, 200, 208]
[216, 208, 200, 192, 184, 176, 168, 160, 154, 146, 138, 130, 122, 116, 110, 102, 100, 92, 94, 86, 80, 72, 66, 60, 60, 54, 56, 48, 56, 62, 70, 74, 80, 78, 86, 90, 98, 104, 112, 120, 128, 134, 142, 150, 158, 160, 168, 176, 184, 192, 200]
[224, 216, 208, 200, 192, 184, 176, 168, 160, 154, 146, 138, 130, 124, 118, 110, 102, 100, 92, 94, 86, 80, 72, 68, 62, 62, 56, 56, 52, 60, 64, 72, 78, 84, 80, 88, 90, 98, 104, 112, 120, 128, 136, 144, 152, 160, 160, 168, 176, 184, 192]
[232, 224, 216, 208, 200, 192, 184, 176, 168, 160, 154, 146, 138, 132, 126, 118, 110, 102, 100, 92, 94, 88, 80, 74, 70, 64, 64, 56, 56, 52, 60, 64, 72, 78, 86, 80, 88, 90, 98, 106, 114, 122, 130, 138, 146, 152, 160, 162, 168, 176, 184]
[240, 232, 224, 216, 208, 200, 192, 184, 176, 168, 160, 154, 146, 140, 134, 126, 118, 110, 102, 100, 92, 96, 88, 82, 76, 72, 66, 64, 60, 60, 54, 62, 68, 76, 80, 88, 80, 88, 90, 98, 106, 114, 122, 130, 138, 146, 152, 160, 166, 170, 178]
[248, 240, 232, 224, 216, 208, 200, 192, 184, 176, 168, 160, 154, 148, 142, 134, 126, 118, 110, 102, 100, 96, 96, 90, 84, 78, 74, 66, 64, 60, 62, 54, 62, 68, 76, 80, 88, 80, 88, 94, 102, 108, 116, 124, 132, 138, 146, 154, 160, 168, 174]
[256, 248, 240, 232, 224, 216, 208, 200, 192, 184, 176, 168, 162, 156, 150, 142, 134, 126, 118, 110, 106, 104, 100, 98, 92, 86, 82, 74, 66, 64, 64, 62, 54, 62, 70, 76, 84, 88, 84, 92, 98, 104, 112, 120, 128, 132, 140, 148, 154, 162, 170]
[264, 256, 248, 240, 232, 224, 216, 208, 200, 192, 184, 176, 170, 164, 158, 150, 142, 134, 126, 118, 114, 110, 108, 102, 100, 94, 90, 82, 74, 66, 68, 64, 62, 54, 62, 70, 78, 84, 92, 88, 96, 100, 108, 116, 124, 128, 136, 142, 148, 156, 164]
[272, 264, 256, 248, 240, 232, 224, 216, 208, 200, 192, 184, 178, 172, 166, 158, 150, 142, 134, 126, 118, 116, 110, 110, 104, 102, 96, 90, 82, 74, 68, 72, 68, 62, 56, 64, 70, 78, 84, 92, 90, 98, 102, 110, 118, 126, 128, 136, 144, 150, 158]
[280, 272, 264, 256, 248, 240, 232, 224, 216, 208, 200, 192, 186, 180, 174, 166, 158, 150, 142, 134, 126, 122, 118, 112, 112, 106, 104, 96, 90, 82, 76, 68, 72, 68, 64, 56, 64, 70, 78, 86, 94, 92, 100, 106, 114, 118, 126, 130, 136, 144, 152]
[288, 280, 272, 264, 256, 248, 240, 232, 224, 216, 208, 200, 194, 188, 182, 174, 166, 158, 150, 142, 134, 130, 126, 120, 116, 114, 110, 104, 96, 90, 84, 76, 68, 72, 72, 64, 60, 64, 72, 80, 88, 96, 96, 104, 110, 114, 122, 128, 130, 138, 146]
[296, 288, 280, 272, 264, 256, 248, 240, 232, 224, 216, 208, 202, 196, 190, 182, 174, 166, 158, 150, 142, 138, 134, 128, 124, 118, 118, 110, 104, 96, 92, 84, 76, 68, 76, 72, 68, 60, 68, 76, 84, 90, 98, 100, 108, 110, 118, 124, 128, 134, 142]
[304, 296, 288, 280, 272, 264, 256, 248, 240, 232, 224, 216, 210, 204, 198, 190, 182, 174, 166, 158, 150, 144, 138, 136, 130, 126, 120, 118, 112, 104, 98, 92, 84, 76, 70, 78, 72, 68, 60, 68, 76, 84, 92, 100, 102, 110, 110, 118, 126, 130, 136]
[312, 304, 296, 288, 280, 272, 264, 256, 248, 240, 232, 224, 218, 212, 206, 198, 190, 182, 174, 166, 158, 152, 146, 140, 138, 132, 128, 120, 118, 112, 106, 98, 92, 84, 78, 70, 78, 72, 68, 64, 72, 78, 86, 94, 102, 102, 110, 112, 118, 126, 134]
[320, 312, 304, 296, 288, 280, 272, 264, 256, 248, 240, 232, 226, 218, 214, 206, 198, 190, 182, 174, 166, 160, 154, 146, 144, 138, 136, 128, 122, 120, 114, 106, 100, 92, 86, 78, 74, 80, 76, 72, 68, 72, 80, 88, 96, 104, 106, 110, 114, 122, 130]
[328, 320, 312, 304, 296, 288, 280, 272, 264, 256, 248, 240, 234, 226, 222, 214, 206, 198, 190, 182, 174, 168, 162, 154, 150, 146, 142, 136, 128, 122, 122, 114, 106, 100, 94, 86, 82, 74, 82, 80, 76, 70, 76, 84, 92, 96, 104, 108, 110, 118, 126]
[336, 328, 320, 312, 304, 296, 288, 280, 272, 264, 256, 248, 242, 234, 230, 222, 214, 206, 198, 190, 182, 176, 170, 162, 158, 152, 150, 142, 136, 128, 126, 122, 114, 106, 102, 94, 90, 82, 78, 86, 84, 78, 74, 80, 88, 92, 100, 106, 108, 114, 122]
[344, 336, 328, 320, 312, 304, 296, 288, 280, 272, 264, 256, 250, 242, 236, 230, 222, 214, 206, 198, 190, 184, 176, 170, 164, 160, 154, 150, 144, 136, 130, 130, 122, 114, 108, 102, 94, 90, 82, 80, 88, 86, 80, 76, 82, 90, 92, 100, 108, 110, 116]
[352, 344, 336, 328, 320, 312, 304, 296, 288, 280, 272, 264, 258, 250, 244, 238, 230, 222, 214, 206, 198, 192, 184, 178, 172, 168, 162, 158, 152, 144, 138, 134, 130, 122, 116, 110, 102, 98, 90, 84, 82, 90, 88, 82, 78, 86, 90, 96, 104, 110, 112]
[360, 352, 344, 336, 328, 320, 312, 304, 296, 288, 280, 272, 264, 258, 250, 246, 238, 230, 222, 214, 206, 198, 192, 186, 178, 176, 168, 166, 160, 152, 144, 142, 138, 130, 122, 118, 110, 106, 98, 90, 84, 86, 90, 88, 82, 82, 88, 94, 100, 104, 110]
[368, 360, 352, 344, 336, 328, 320, 312, 304, 296, 288, 280, 272, 266, 258, 254, 246, 238, 230, 222, 214, 206, 200, 194, 186, 182, 176, 172, 168, 160, 152, 148, 146, 138, 130, 126, 118, 114, 106, 98, 90, 88, 86, 90, 88, 86, 84, 92, 98, 100, 104]
[376, 368, 360, 352, 344, 336, 328, 320, 312, 304, 296, 288, 280, 274, 266, 258, 254, 246, 238, 230, 222, 214, 208, 202, 194, 188, 184, 176, 172, 168, 160, 152, 148, 146, 138, 130, 126, 118, 114, 106, 98, 92, 92, 90, 94, 88, 90, 86, 92, 100, 104]
[384, 376, 368, 360, 352, 344, 336, 328, 320, 312, 304, 296, 288, 282, 274, 266, 258, 254, 246, 238, 230, 222, 214, 210, 202, 196, 190, 184, 180, 176, 168, 160, 156, 152, 146, 138, 130, 126, 118, 114, 106, 100, 94, 94, 92, 96, 88, 94, 90, 94, 102]
[392, 384, 376, 368, 360, 352, 344, 336, 328, 320, 312, 304, 296, 290, 282, 274, 266, 258, 254, 246, 238, 230, 222, 216, 210, 204, 198, 190, 184, 180, 176, 168, 160, 156, 154, 146, 138, 130, 126, 122, 114, 108, 102, 98, 98, 92, 96, 90, 94, 94, 98]
[400, 392, 384, 376, 368, 360, 352, 344, 336, 328, 320, 312, 304, 298, 290, 282, 274, 266, 258, 254, 246, 238, 230, 224, 218, 212, 206, 198, 192, 188, 182, 176, 168, 164, 158, 154, 146, 138, 130, 128, 122, 116, 110, 104, 100, 100, 92, 98, 94, 96, 96]
# 分值96

生信算法文章

生信算法1 - DNA测序算法实践之序列操作
生信算法2 - DNA测序算法实践之序列统计
生信算法3 - 基于k-mer算法获取序列比对索引
生信算法4 - 获取overlap序列索引和序列的算法

文章来源:https://blog.csdn.net/LittleComputerRobot/article/details/134975093
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。