Skip to content

Commit a652987

Browse files
committed
v 1.0.3 polymut biopython fix
1 parent 1ac67f8 commit a652987

File tree

1 file changed

+46
-43
lines changed

1 file changed

+46
-43
lines changed

cmseq/cmseq.py

Lines changed: 46 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -104,55 +104,58 @@ def rev_comp(string):
104104
bases = [complement[base] for base in bases]
105105
bases.reverse()
106106
return ''.join(bases)
107-
108-
in_file = inputGFF
109-
in_handle = open(in_file)
110-
#gene_locations = {}
107+
111108
try:
112-
parsed_gff = GFF.parse(in_handle)
109+
with open(inputGFF) as in_handle:
110+
_ = next(GFF.parse(in_handle))
113111
except:
114112
print ('Parsing of GFF failed. This is probably because your biopython version is too new. Try downgrading to 1.76 or older')
115-
for rec in GFF.parse(in_handle):
116-
tmp = []
117-
for r in rec.features:
118-
if "minced" in r.qualifiers['source'][0] or "Minced" in r.qualifiers['source'][0]:
119-
# This catches CRISPR repeats.
120-
continue
121-
if r.sub_features:
122-
prodigal_bool = 'Prodigal' in r.sub_features[0].qualifiers['source'][0] or 'prodigal' in r.sub_features[0].qualifiers['source'][0]
123-
else:
124-
prodigal_bool = 'Prodigal' in r.qualifiers['source'][0] or 'prodigal' in r.qualifiers['source'][0]
125-
126-
if prodigal_bool:
127-
# Prokka not only finds protein sequences, but also t-/r-RNA sequences. In order to only parse protein coding sequences,
128-
# I search for Prodigal/Prodigal in the source entry of the sub_features attribute.
129-
130-
# the sub_features attribute of a seq_record object is apparently deprecated. I couldn't find any other way to access
131-
# the required information, though. Should probably be fixed when I can.
132-
indices = str(r.location).split('[')[1].split(']')[0].split(':')
133-
indices = [int(x) for x in indices]
134-
sense = str(r.location).split('(')[1].split(')')[0]
135-
if sense == "-":
136-
gene_seq = rev_comp(rec.seq[indices[0]:indices[1]])
137-
else:
138-
gene_seq = rec.seq[indices[0]:indices[1]]
113+
sys.exit(1)
139114

140-
if (str(gene_seq[0:3]) == "ATG" or str(gene_seq[0:3]) == "GTG" or str(gene_seq[0:3]) == "TTG"):
141-
pass
115+
with open(inputGFF) as in_handle:
116+
117+
for rec in GFF.parse(in_handle):
118+
tmp = []
119+
for r in rec.features:
120+
if "minced" in r.qualifiers['source'][0] or "Minced" in r.qualifiers['source'][0]:
121+
# This catches CRISPR repeats.
122+
continue
123+
if r.sub_features:
124+
prodigal_bool = 'Prodigal' in r.sub_features[0].qualifiers['source'][0] or 'prodigal' in r.sub_features[0].qualifiers['source'][0]
142125
else:
143-
warnings.warn(str(r.id) + " doesn't start with a common start codon. Beware. Continuing.")
126+
prodigal_bool = 'Prodigal' in r.qualifiers['source'][0] or 'prodigal' in r.qualifiers['source'][0]
127+
128+
if prodigal_bool:
129+
# Prokka not only finds protein sequences, but also t-/r-RNA sequences. In order to only parse protein coding sequences,
130+
# I search for Prodigal/Prodigal in the source entry of the sub_features attribute.
131+
132+
# the sub_features attribute of a seq_record object is apparently deprecated. I couldn't find any other way to access
133+
# the required information, though. Should probably be fixed when I can.
134+
indices = str(r.location).split('[')[1].split(']')[0].split(':')
135+
indices = [int(x) for x in indices]
136+
sense = str(r.location).split('(')[1].split(')')[0]
137+
if sense == "-":
138+
gene_seq = rev_comp(rec.seq[indices[0]:indices[1]])
139+
else:
140+
gene_seq = rec.seq[indices[0]:indices[1]]
141+
142+
if (str(gene_seq[0:3]) == "ATG" or str(gene_seq[0:3]) == "GTG" or str(gene_seq[0:3]) == "TTG"):
143+
pass
144+
else:
145+
warnings.warn(str(r.id) + " doesn't start with a common start codon. Beware. Continuing.")
146+
147+
if (str(gene_seq[-3:]) == "TAG" or str(gene_seq[-3:]) == "TAA" or str(gene_seq[-3:]) == "TGA"):
148+
pass
149+
else:
150+
warnings.warn(str(r.id) + " doesn't stop with a usual stop codon. Beware. Continuing.")
151+
tmp.append((indices, sense))
152+
153+
if str(rec.id) in self.contigs:
154+
self.contigs[str(rec.id)].annotations.append(tmp)
155+
else:
156+
warnings.warn(str(rec.id) + " is not tracked by the BAMFile.")
144157

145-
if (str(gene_seq[-3:]) == "TAG" or str(gene_seq[-3:]) == "TAA" or str(gene_seq[-3:]) == "TGA"):
146-
pass
147-
else:
148-
warnings.warn(str(r.id) + " doesn't stop with a usual stop codon. Beware. Continuing.")
149-
tmp.append((indices, sense))
150-
151-
if str(rec.id) in self.contigs:
152-
self.contigs[str(rec.id)].annotations.append(tmp)
153-
else:
154-
warnings.warn(str(rec.id) + " is not tracked by the BAMFile.")
155-
in_handle.close()
158+
156159

157160

158161
def parallel_reference_free_consensus(self,ncores=4,**kwargs):

0 commit comments

Comments
 (0)