|
| 1 | +#include <iostream> |
| 2 | +#include <vector> |
| 3 | +#include <string> |
| 4 | +#include <sstream> |
| 5 | +#include "htslib/sam.h" |
| 6 | +#include <cmath> |
| 7 | + |
| 8 | +using namespace std; |
| 9 | + |
| 10 | +int main(int argc, char* argv[]) { |
| 11 | + string filename = argv[1]; |
| 12 | + |
| 13 | + htsFile *htsfp; |
| 14 | + bam_hdr_t *samHeader; |
| 15 | + |
| 16 | + |
| 17 | + htsfp = hts_open(filename.c_str(),"r"); |
| 18 | + const htsFormat *fmt = hts_get_format(htsfp); |
| 19 | + |
| 20 | + |
| 21 | + samHeader = sam_hdr_read(htsfp); |
| 22 | + |
| 23 | + |
| 24 | + vector<vector<int> > covBins; |
| 25 | + for (int i =0; i < samHeader->n_targets; i++) { |
| 26 | + covBins.push_back(vector<int>() ); |
| 27 | + int last=covBins.size()-1; |
| 28 | + covBins[last].resize(samHeader->target_len[i]/100+1); |
| 29 | + } |
| 30 | + |
| 31 | + |
| 32 | + bam1_t *b = bam_init1(); |
| 33 | + int res=1; |
| 34 | + res= sam_read1(htsfp, samHeader, b); |
| 35 | + long readIndex=0; |
| 36 | + long nCounted=0; |
| 37 | + while (res > 0) { |
| 38 | + readIndex+=1; |
| 39 | + long alnPos = b->core.pos; |
| 40 | + int tid=b->core.tid; |
| 41 | + if (alnPos >= 0) { |
| 42 | + covBins[tid][alnPos/100]+=1; |
| 43 | + nCounted+=1; |
| 44 | + uint8_t *xaData = bam_aux_get(b, "XA"); |
| 45 | + if (xaData != 0) { |
| 46 | + char *xaString=bam_aux2Z(xaData); |
| 47 | + stringstream auxStrm((char*)xaString); |
| 48 | + string aln; |
| 49 | + int index=0; |
| 50 | + while(std::getline(auxStrm, aln, ';')) { |
| 51 | + |
| 52 | + size_t pos=0; |
| 53 | + while ((pos=aln.find(',',pos)) != string::npos) { |
| 54 | + aln[pos] = '\t'; |
| 55 | + } |
| 56 | + stringstream elemStrm(aln); |
| 57 | + string token; |
| 58 | + int ap=0; |
| 59 | + string chrom; |
| 60 | + long signedPos; |
| 61 | + string cigar; |
| 62 | + int mapq; |
| 63 | + elemStrm >> chrom >> signedPos >> cigar >> mapq; |
| 64 | + int tid =sam_hdr_name2tid(samHeader, chrom.c_str()); |
| 65 | + covBins[tid][abs(signedPos)/100]+=1; |
| 66 | + nCounted+=1; |
| 67 | + index+=1; |
| 68 | + } |
| 69 | + } |
| 70 | + } |
| 71 | + res = sam_read1(htsfp, samHeader, b); |
| 72 | + if (readIndex %1000000 == 0) { |
| 73 | + cerr << "proc " << readIndex /1000000 << "M\t" << nCounted << endl; |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + for (int i=0; i < covBins.size(); i++) { |
| 78 | + string name=samHeader->target_name[i]; |
| 79 | + for (int j=0; j < covBins[i].size(); j++) { |
| 80 | + if (covBins[i][j] > 0) { |
| 81 | + cout << name << "\t" << j*100 << "\t" << (j+1)*100 << "\t" << covBins[i][j] << endl; |
| 82 | + } |
| 83 | + } |
| 84 | + } |
| 85 | +} |
0 commit comments