|
24 | 24 | #include "htslib_wrapper.hpp" |
25 | 25 | #include "smithlab_utils.hpp" |
26 | 26 | #include "MappedRead.hpp" |
| 27 | +#include "cigar_utils.hpp" |
27 | 28 |
|
28 | 29 | using std::string; |
29 | 30 | using std::vector; |
@@ -108,62 +109,6 @@ operator>>(SAMReader &reader, SAMRecord &aln) { |
108 | 109 | return reader; |
109 | 110 | } |
110 | 111 |
|
111 | | -///////////////////////////////////////////// |
112 | | -//// general facility for SAM format |
113 | | -///////////////////////////////////////////// |
114 | | - |
115 | | -static void |
116 | | -apply_CIGAR(const string &seq, const string &qual, |
117 | | - const string &CIGAR, string &new_seq, string &new_qual) { |
118 | | - assert(seq.size() == qual.size()); |
119 | | - assert(new_seq.size() == 0 && new_qual.size() == 0); |
120 | | - size_t n; |
121 | | - char op; |
122 | | - size_t i = 0; |
123 | | - |
124 | | - std::istringstream iss(CIGAR); |
125 | | - while (iss >> n >> op) { |
126 | | - switch (op) |
127 | | - { |
128 | | - case 'M': |
129 | | - new_seq += seq.substr(i, n); |
130 | | - new_qual += qual.substr(i, n); |
131 | | - i += n; |
132 | | - break; |
133 | | - case 'I': |
134 | | - i += n; |
135 | | - break; |
136 | | - case 'D': |
137 | | - new_seq += string(n, 'N'); |
138 | | - new_qual += string(n, 'B'); |
139 | | - break; |
140 | | - case 'S': |
141 | | - i += n; |
142 | | - break; |
143 | | - case 'H': |
144 | | - ; |
145 | | - break; |
146 | | - case 'P': |
147 | | - ; |
148 | | - break; |
149 | | - case '=': |
150 | | - new_seq += seq.substr(i, n); |
151 | | - new_qual += qual.substr(i, n); |
152 | | - i += n; |
153 | | - break; |
154 | | - case 'X': |
155 | | - new_seq += seq.substr(i, n); |
156 | | - new_qual += qual.substr(i, n); |
157 | | - i += n; |
158 | | - break; |
159 | | - } |
160 | | - } |
161 | | - // Sum of lengths of the M/I/S/=/X operations |
162 | | - // shall equal the length of seq. |
163 | | - |
164 | | - assert(i == seq.length()); |
165 | | - assert(new_seq.size() == new_qual.size()); |
166 | | -} |
167 | 112 |
|
168 | 113 | class FLAG { |
169 | 114 | public: |
@@ -228,12 +173,11 @@ SAMReader::get_SAMRecord_bsmap(const string &str, SAMRecord &samr) { |
228 | 173 | bsmap_get_strand(strand_str, strand, bs_forward); |
229 | 174 | samr.mr.r.set_strand(strand[0]); |
230 | 175 |
|
231 | | - string new_seq, new_qual; |
232 | | - apply_CIGAR(seq, qual, CIGAR, new_seq, new_qual); |
| 176 | + string new_seq(seq); |
| 177 | + apply_cigar(CIGAR, new_seq); |
233 | 178 |
|
234 | 179 | samr.mr.r.set_end(samr.mr.r.get_start() + new_seq.size()); |
235 | 180 | samr.mr.seq = new_seq; |
236 | | - samr.mr.scr = new_qual; |
237 | 181 |
|
238 | 182 | samr.is_Trich = Flag.is_Trich(); |
239 | 183 | samr.is_mapping_paired = Flag.is_mapping_paired(); |
@@ -297,17 +241,14 @@ SAMReader::get_SAMRecord_bismark(const string &str, SAMRecord &samr) { |
297 | 241 | samr.mr.r.set_score(get_mismatch_bismark(edit_distance_str, meth_call_str)); |
298 | 242 | samr.mr.r.set_strand(Flag.is_revcomp() ? '-' : '+'); |
299 | 243 |
|
300 | | - string new_seq, new_qual; |
301 | | - apply_CIGAR(seq, qual, CIGAR, new_seq, new_qual); |
| 244 | + string new_seq(seq); |
| 245 | + apply_cigar(CIGAR, new_seq); |
302 | 246 |
|
303 | | - if (Flag.is_revcomp()) { |
| 247 | + if (Flag.is_revcomp()) |
304 | 248 | revcomp_inplace(new_seq); |
305 | | - std::reverse(new_qual.begin(), new_qual.end()); |
306 | | - } |
307 | 249 |
|
308 | 250 | samr.mr.r.set_end(samr.mr.r.get_start() + new_seq.size()); |
309 | 251 | samr.mr.seq = new_seq; |
310 | | - samr.mr.scr = new_qual; |
311 | 252 |
|
312 | 253 | samr.is_Trich = Flag.is_Trich(); |
313 | 254 | samr.is_mapping_paired = Flag.is_mapping_paired(); |
@@ -369,17 +310,14 @@ SAMReader::get_SAMRecord_bsseeker(const string &str, SAMRecord &samr) { |
369 | 310 | samr.mr.r.set_score(atoi(mismatch_str.substr(5).c_str())); |
370 | 311 | samr.mr.r.set_strand(Flag.is_revcomp() ? '-' : '+'); |
371 | 312 |
|
372 | | - string new_seq, new_qual; |
373 | | - apply_CIGAR(seq, qual, CIGAR, new_seq, new_qual); |
| 313 | + string new_seq(seq); |
| 314 | + apply_cigar(CIGAR, new_seq); |
374 | 315 |
|
375 | | - if (Flag.is_revcomp()) { |
| 316 | + if (Flag.is_revcomp()) |
376 | 317 | revcomp_inplace(new_seq); |
377 | | - std::reverse(new_qual.begin(), new_qual.end()); |
378 | | - } |
379 | 318 |
|
380 | 319 | samr.mr.r.set_end(samr.mr.r.get_start() + new_seq.size()); |
381 | 320 | samr.mr.seq = new_seq; |
382 | | - samr.mr.scr = new_qual; |
383 | 321 |
|
384 | 322 | samr.is_Trich = Flag.is_Trich(); |
385 | 323 | samr.is_mapping_paired = Flag.is_mapping_paired(); |
@@ -423,19 +361,15 @@ SAMReader::get_SAMRecord_general(const string &str, SAMRecord &samr) { |
423 | 361 | samr.mr.r.set_score(0); |
424 | 362 | samr.mr.r.set_strand(Flag.is_revcomp() ? '-' : '+'); |
425 | 363 |
|
426 | | - string new_seq, new_qual; |
427 | | - apply_CIGAR(seq, qual, CIGAR, new_seq, new_qual); |
428 | | - |
| 364 | + string new_seq(seq); |
| 365 | + apply_cigar(CIGAR, new_seq); |
429 | 366 |
|
430 | | - if (Flag.is_revcomp()) { |
| 367 | + if (Flag.is_revcomp()) |
431 | 368 | revcomp_inplace(new_seq); |
432 | | - std::reverse(new_qual.begin(), new_qual.end()); |
433 | | - } |
434 | 369 |
|
435 | 370 | samr.mr.r.set_end(samr.mr.r.get_start() + new_seq.size()); |
436 | 371 |
|
437 | 372 | samr.mr.seq = new_seq; |
438 | | - samr.mr.scr = new_qual; |
439 | 373 |
|
440 | 374 | } |
441 | 375 | samr.is_Trich = Flag.is_Trich(); |
|
0 commit comments