Break it down for us user, I'm listening, explain like we're all retarded, because we are, thanks!
Hunter Edwards
Run this script if you don't believe me:
import os import sys import numpy as np
def prepSequence(filename): with open(filename, 'r') as sequence: temp = sequence.readlines()
temp = map(lambda x: x.strip(), temp) # Removing newline chars temp = sum(map(lambda x: x.split(' '), temp), []) # Joining list of lines temp = filter(lambda x: x is not '', temp) # Removing empty strings temp = filter(lambda x: not x.isdigit(), temp) # Removing gene numbering return list(temp)
def LCSLength(A, B): m = len(A) n = len(B) C = np.zeros((m, n)) for i in range(1, m): for j in range(1, n): if A[i] == B[j]: C[i][j] = C[i-1][j-1] + 1 else: C[i][j] = max(C[i][j-1], C[i-1][j]) return C[m-1][n-1]
def main(): covid19 = ''.join(prepSequence('covid19')) hiv = ''.join(prepSequence('hiv')) print(len(covid19)) print(len(hiv))
print(LCSLength(covid19, hiv))
if __name__ == '__main__': main()
It's pretty slow since it's not really optimized, but it does the job.