Skip to content

Commit ff271bf

Browse files
Steps towards changing how we work with mapped reads and sam format
1 parent 8483985 commit ff271bf

File tree

1 file changed

+12
-78
lines changed

1 file changed

+12
-78
lines changed

htslib_wrapper.cpp

Lines changed: 12 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "htslib_wrapper.hpp"
2525
#include "smithlab_utils.hpp"
2626
#include "MappedRead.hpp"
27+
#include "cigar_utils.hpp"
2728

2829
using std::string;
2930
using std::vector;
@@ -108,62 +109,6 @@ operator>>(SAMReader &reader, SAMRecord &aln) {
108109
return reader;
109110
}
110111

111-
/////////////////////////////////////////////
112-
//// general facility for SAM format
113-
/////////////////////////////////////////////
114-
115-
static void
116-
apply_CIGAR(const string &seq, const string &qual,
117-
const string &CIGAR, string &new_seq, string &new_qual) {
118-
assert(seq.size() == qual.size());
119-
assert(new_seq.size() == 0 && new_qual.size() == 0);
120-
size_t n;
121-
char op;
122-
size_t i = 0;
123-
124-
std::istringstream iss(CIGAR);
125-
while (iss >> n >> op) {
126-
switch (op)
127-
{
128-
case 'M':
129-
new_seq += seq.substr(i, n);
130-
new_qual += qual.substr(i, n);
131-
i += n;
132-
break;
133-
case 'I':
134-
i += n;
135-
break;
136-
case 'D':
137-
new_seq += string(n, 'N');
138-
new_qual += string(n, 'B');
139-
break;
140-
case 'S':
141-
i += n;
142-
break;
143-
case 'H':
144-
;
145-
break;
146-
case 'P':
147-
;
148-
break;
149-
case '=':
150-
new_seq += seq.substr(i, n);
151-
new_qual += qual.substr(i, n);
152-
i += n;
153-
break;
154-
case 'X':
155-
new_seq += seq.substr(i, n);
156-
new_qual += qual.substr(i, n);
157-
i += n;
158-
break;
159-
}
160-
}
161-
// Sum of lengths of the M/I/S/=/X operations
162-
// shall equal the length of seq.
163-
164-
assert(i == seq.length());
165-
assert(new_seq.size() == new_qual.size());
166-
}
167112

168113
class FLAG {
169114
public:
@@ -228,12 +173,11 @@ SAMReader::get_SAMRecord_bsmap(const string &str, SAMRecord &samr) {
228173
bsmap_get_strand(strand_str, strand, bs_forward);
229174
samr.mr.r.set_strand(strand[0]);
230175

231-
string new_seq, new_qual;
232-
apply_CIGAR(seq, qual, CIGAR, new_seq, new_qual);
176+
string new_seq(seq);
177+
apply_cigar(CIGAR, new_seq);
233178

234179
samr.mr.r.set_end(samr.mr.r.get_start() + new_seq.size());
235180
samr.mr.seq = new_seq;
236-
samr.mr.scr = new_qual;
237181

238182
samr.is_Trich = Flag.is_Trich();
239183
samr.is_mapping_paired = Flag.is_mapping_paired();
@@ -297,17 +241,14 @@ SAMReader::get_SAMRecord_bismark(const string &str, SAMRecord &samr) {
297241
samr.mr.r.set_score(get_mismatch_bismark(edit_distance_str, meth_call_str));
298242
samr.mr.r.set_strand(Flag.is_revcomp() ? '-' : '+');
299243

300-
string new_seq, new_qual;
301-
apply_CIGAR(seq, qual, CIGAR, new_seq, new_qual);
244+
string new_seq(seq);
245+
apply_cigar(CIGAR, new_seq);
302246

303-
if (Flag.is_revcomp()) {
247+
if (Flag.is_revcomp())
304248
revcomp_inplace(new_seq);
305-
std::reverse(new_qual.begin(), new_qual.end());
306-
}
307249

308250
samr.mr.r.set_end(samr.mr.r.get_start() + new_seq.size());
309251
samr.mr.seq = new_seq;
310-
samr.mr.scr = new_qual;
311252

312253
samr.is_Trich = Flag.is_Trich();
313254
samr.is_mapping_paired = Flag.is_mapping_paired();
@@ -369,17 +310,14 @@ SAMReader::get_SAMRecord_bsseeker(const string &str, SAMRecord &samr) {
369310
samr.mr.r.set_score(atoi(mismatch_str.substr(5).c_str()));
370311
samr.mr.r.set_strand(Flag.is_revcomp() ? '-' : '+');
371312

372-
string new_seq, new_qual;
373-
apply_CIGAR(seq, qual, CIGAR, new_seq, new_qual);
313+
string new_seq(seq);
314+
apply_cigar(CIGAR, new_seq);
374315

375-
if (Flag.is_revcomp()) {
316+
if (Flag.is_revcomp())
376317
revcomp_inplace(new_seq);
377-
std::reverse(new_qual.begin(), new_qual.end());
378-
}
379318

380319
samr.mr.r.set_end(samr.mr.r.get_start() + new_seq.size());
381320
samr.mr.seq = new_seq;
382-
samr.mr.scr = new_qual;
383321

384322
samr.is_Trich = Flag.is_Trich();
385323
samr.is_mapping_paired = Flag.is_mapping_paired();
@@ -423,19 +361,15 @@ SAMReader::get_SAMRecord_general(const string &str, SAMRecord &samr) {
423361
samr.mr.r.set_score(0);
424362
samr.mr.r.set_strand(Flag.is_revcomp() ? '-' : '+');
425363

426-
string new_seq, new_qual;
427-
apply_CIGAR(seq, qual, CIGAR, new_seq, new_qual);
428-
364+
string new_seq(seq);
365+
apply_cigar(CIGAR, new_seq);
429366

430-
if (Flag.is_revcomp()) {
367+
if (Flag.is_revcomp())
431368
revcomp_inplace(new_seq);
432-
std::reverse(new_qual.begin(), new_qual.end());
433-
}
434369

435370
samr.mr.r.set_end(samr.mr.r.get_start() + new_seq.size());
436371

437372
samr.mr.seq = new_seq;
438-
samr.mr.scr = new_qual;
439373

440374
}
441375
samr.is_Trich = Flag.is_Trich();

0 commit comments

Comments
 (0)