Easy Python Function Question

I need to create a program called extractGenes.py

The command line parameters need to take 2 OR 3 parameters

1. -s: this is an optional parameter, or switch, indicating that the user wwants the spliced gene sequence(introns removed). The user does not have to provide this (meaning he wants the entire gene sequence), but it he does provide it then it must be the first parameter

2. input file (with the genes)

3. output file (where the program will create to store the fasta file

the file contains line like this:

NM_001003443 chr11 + 5925152 592608098 2 5925152,5925652, 5925404,5926898,

However, I am not sure how to include the -s, optional parameter, into the starting function.

Any help would be amazing!!


  • import MySQLdb as mdb
    import os, sys
    import argparse as ap
    from Bio import SeqIO
    import re

    class Gene:
    def __init__(self, name, strand, l, r, s):
    self.geneName = name
    self.geneStrand = strand
    self.left = l
    self.right = r
    self.synonym = s

    def __str__(self):
    return "%s %s %d %d %s"%(self.geneName,self.geneStrand,self.left,self.right,self.synonym)

    class DBInteract:
    def __init__(self, h, u, p, s):
    self.conn = mdb.connect(h,u,p,s)
    self.c1 = self.conn.cursor()
    self.c2 = self.conn.cursor()
    except mdb.Error:
    print "Error connecting to the database."
    print 'Please verify the parameters are the same as the configuration of the database'

    def addCorr(self, oid, oname):
    self.c1.execute('''INSERT INTO organism_id_lookup VALUES(%s, %s);''',(oid, oname,))
    except mdb.Error:
    print 'Unable to insert data into table 'organism_id_lookup''

    def createGeneTable(self, oid, genes):

    def addCorrespondence(filename, db):
    tokens = filename.split('/')
    id_name = tokens[-1].split('.')
    organism_id = id_name[0]
    organism_name = tokens[-2]
    print organism_id,' id'
    print organism_name
    db.addCorr(organism_id, organism_name)

    def processFile(filename, db):
    genome = SeqIO.read(filename,'genbank')

    for i,feat in enumerate(genome.features):
    if feat.type=='gene':
    print len(genes)

    def findGBKFile(sdir, db):
    for i, (root, dirs, files) in enumerate(os.walk(sdir)):
    if i != 0:
    for filename in files:
    if re.match('.+.gbk', filename):
    fullpath = os.path.join(root, filename)

    if __name__ == "__main__":
    parser = ap.ArgumentParser(description='Extract genes from genbank files.')
    parser.add_argument('sdir', nargs=1, help='the source directory where the gbk files are located')
    args = parser.parse_args()
    sdir = args.sdir[0]
    db = DBInteract('localhost','root','cf3231383','genes')
    findGBKFile(sdir, db)
Sign In or Register to comment.

Howdy, Stranger!

It looks like you're new here. If you want to get involved, click one of these buttons!


In this Discussion