#!/usr/bin/env python ############################################################################ #This is a simple script made to brute force the closest-mass fragmentation# #from some digestion and mass spec. Made to automate biochemlab project # #for pectate lyase, but this should (slowly) work for big proteins, too # # This version of the script has no pre-requisite modules to be installed # ############################################################################ #import readline #Only uncomment this line if you have PyReadLine installed #readline.parse_and_bind("control-v: paste") #This is only necessary if you're going to compile the program AAweight = {'A':71.0779, #uses average mass, not monoisotopic 'R':156.1857, 'N':114.1026, 'D':115.0874, 'C':103.1429, 'E':129.114, 'Q':128.1292, 'G':57.0513, 'H':137.1393, 'I':113.1576, 'L':113.1576, 'K':128.1723, 'M':131.1961, 'F':147.1739, 'P':97.1152, 'S':87.0773, 'T':101.1039, 'U':150.0379, 'W':186.2099, 'Y':163.1733, 'V':99.1311, 'X':110.0000 #X stands for any unknown AA; used 110 because average AA mass is ~110 } exitnow = 0 #Used when we're checking if we want to exit def search(positions,sequence,desired): #Get weight of selected fragment prevweight = 0 flagged = 0 #Used for when we're looking for worse masses for first in positions: first+=1 for second in positions: second+=1 weight = 0 if int(first) >= int(second): #Because fragments are real-world things continue fragment = sequence[first:second] while len(fragment) > 0: residue = fragment[-1] fragment = fragment[:-1] weight += AAweight[residue] weight += 18 #Accounts for the -OH on C and -H on N termini weight = int(weight) if weight in foundmasses: flagged = 1 if flagged !=1: # print 'Found '+str(weight)+' Da from '+str(first+1)+'-'+str(second+1)+'\n' #Uncomment the line to see every weight found if abs(weight - desired) < abs(prevweight - desired): prevweight = weight savefirst = first savesecond = second-1 # print '^^^BETTER WEIGHT^^^\n' #Uncomment this line and the above commented line to mark when we get a closer weight flagged = 0 print 'Best weight= '+str(prevweight)+' Da from '+str(savefirst+1)+'-'+str(savesecond+1) foundmasses.append(prevweight) return [prevweight,savefirst+1,savesecond+1] def positionfix(stringlist): #machine uses 0 as first number, not 1 positions = stringlist positions = positions.split() positions = [int(n) for n in positions] positions[:] = [x-1 for x in positions] if positions[0] != -1: positions.insert(0,-1) #pop a 0 in there so we start from the beginning if positions[-1] !=(len(sequence)-1): positions.insert(-1,(len(sequence)-1)) #make sure we can go all the way to the last residue return positions def checkexit(): done = 0 while done == 0: check = str(inputwrapper('Type "Continue" to continue, otherwise type "done" to exit\n')) if check.lower() == 'done': return 1 done = 1 elif check.lower() == 'continue': return 0 done = 1 def checkdone(): done = 0 while done == 0: check = str(inputwrapper('Type "Continue" find next-best fragment, otherwise type "Stop" if you have enough fragments\n')) if check.lower() == 'stop': return 0 done = 1 elif check.lower() == 'continue': return 1 done = 1 def inputwrapper(request): print request value = '' while value == '': value = raw_input() return value while exitnow == 0: weight = 0 #Protein weight prevweight = 0 #Used in search function foundproteins = [['MW in Da','first residue','last residue']] foundmasses=[] cutpositions = inputwrapper('From ExPasy, paste only the column labeled "Positions of Cleavage Sites" here: ') sequence = inputwrapper('Paste the protein\'s entire, uncut sequence here: ') desired = int(inputwrapper('Paste the MW you\'re looking for, in Daltons, here: ')) positions = positionfix(cutpositions) keepgoing = 1 while keepgoing == 1: foundproteins.append(search(positions,sequence,desired)) keepgoing = checkdone() print 'List of found masses:\n' for item in foundproteins: print item exitnow = checkexit()