#!/usr/bin/env python ############################################################################ #This is a simple script made to brute force the closest-mass fragmentation# #from some digestion and mass spec. Made to automate biochemlab project # #for pectate lyase, but this should (slowly) work for big proteins, too # ############################################################################ #!!!This script requires Biopython, PyReadLine, and possibly Numpy!!! from Bio.SeqUtils.ProtParam import ProteinAnalysis import readline readline.parse_and_bind("control-v: paste") exitnow = 0 #Used when we're checking if we want to exit def search(positions,sequence,desired): #Get weight of selected fragment prevweight = 0 flagged = 0 #Used for when we're looking for worse masses for first in positions: first+=1 for second in positions: second+=1 weight = 0 if int(first) >= int(second): #Because fragments are real-world things continue fragment = sequence[first:second] prot = ProteinAnalysis(fragment.upper()) weight = int(prot.molecular_weight()) if weight in foundmasses: flagged = 1 if flagged !=1: # print 'Found '+str(weight)+' Da from '+str(first+1)+'-'+str(second+1)+'\n' #Uncomment this line to see every weight found if abs(weight - desired) < abs(prevweight - desired): prevweight = weight savefirst = first savesecond = second-1 # print '^^^BETTER WEIGHT^^^\n' flagged = 0 print 'Best weight= '+str(prevweight)+' Da from '+str(savefirst+1)+'-'+str(savesecond+1) #Uncomment this line and the above commented line to mark when we get a closer weight foundmasses.append(prevweight) return [prevweight,savefirst+1,savesecond+1] def positionfix(stringlist): #machine uses 0 as first number, not 1 positions = stringlist positions = positions.split() positions = [int(n) for n in positions] positions[:] = [x-1 for x in positions] if positions[0] != -1: positions.insert(0,-1) #pop a 0 in there so we start from the beginning if positions[-1] !=(len(sequence)-1): positions.insert(-1,(len(sequence)-1)) #make sure we can go all the way to the last residue return positions def checkexit(): done = 0 while done == 0: check = str(inputwrapper('Type "Continue" to continue, otherwise type "done" to exit\n')) if check.lower() == 'done': return 1 done = 1 elif check.lower() == 'continue': return 0 done = 1 def checkdone(): done = 0 while done == 0: check = str(inputwrapper('Type "Continue" find next-best fragment, otherwise type "Stop" if you have enough fragments\n')) if check.lower() == 'stop': return 0 done = 1 elif check.lower() == 'continue': return 1 done = 1 def inputwrapper(request): print request value = '' while value == '': value = raw_input() return value while exitnow == 0: prevweight = 0 #Used in search function foundproteins = [['MW in Da','first residue','last residue']] foundmasses=[] cutpositions = inputwrapper('From ExPasy, paste only the column labeled "Positions of Cleavage Sites" here: ') sequence = inputwrapper('Paste the protein\'s entire, uncut sequence here: ') desired = int(inputwrapper('Paste the MW you\'re looking for, in Daltons, here: ')) positions = positionfix(cutpositions) keepgoing = 1 while keepgoing == 1: foundproteins.append(search(positions,sequence,desired)) keepgoing = checkdone() print 'List of found masses:\n' for item in foundproteins: print item exitnow = checkexit()