Skip to content

Commit 1d606b8

Browse files
committed
Add snp-pileup example script
1 parent 9821c6e commit 1d606b8

File tree

2 files changed

+218
-0
lines changed

2 files changed

+218
-0
lines changed

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,7 @@ gem "minitest"
99
gem "rake"
1010
gem "simplecov"
1111

12+
gem "csv"
13+
1214
gem "colorize"
1315
gem "diffy"

examples/snp_pileup.rb

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
# A tiny, simplified snp-pileup-like tool implemented in Ruby using ruby-htslib.
5+
# It reads a VCF/BCF and a list of BAM/CRAM files, and for each biallelic SNP
6+
# position writes per-file ref/alt/error/deletion counts to a CSV.
7+
#
8+
# Usage:
9+
# examples/snp_pileup.rb [options] <vcf> <out.csv> <bam1> [bam2 ...]
10+
#
11+
# Options:
12+
# -q INT Minimum mapping quality (default: 0)
13+
# -Q INT Minimum base quality (default: 0)
14+
# -d INT Maximum per-file depth (default: 4000)
15+
# -x Ignore read-pair overlap adjustment (default: enabled)
16+
#
17+
# Notes:
18+
# - This is intentionally simplified for clarity and as an example. It does not
19+
# implement all of snp-pileup's features nor advanced performance tricks.
20+
# - When regions are used, BAM/CRAM index files are required.
21+
22+
require "optparse"
23+
require "csv"
24+
require "htslib"
25+
26+
# Helper class to manage streaming mpileup state per contig
27+
class ContigPileup
28+
attr_reader :chrom, :mpileup, :enumerator, :current_columns, :current_pos
29+
30+
def initialize(chrom, bams, opts)
31+
@chrom = chrom
32+
@mpileup = HTS::Bam::Mpileup.new(
33+
bams,
34+
region: chrom,
35+
maxcnt: opts[:max_depth],
36+
overlaps: !opts[:ignore_overlaps]
37+
)
38+
@enumerator = @mpileup.each
39+
advance_once
40+
end
41+
42+
def advance_to(target_pos)
43+
advance_once while @current_pos && @current_pos < target_pos
44+
end
45+
46+
def close
47+
@mpileup&.close
48+
rescue StandardError
49+
nil
50+
end
51+
52+
private
53+
54+
def advance_once
55+
@current_columns = @enumerator.next
56+
@current_pos = @current_columns.first&.pos
57+
rescue StopIteration
58+
@current_columns = nil
59+
@current_pos = nil
60+
end
61+
end
62+
63+
# Calculate per-file ref/alt/error/deletion tallies for a pileup column
64+
def compute_tallies(columns, ref, alt, min_mapq:, min_baseq:)
65+
columns.map do |col|
66+
r = 0
67+
a = 0
68+
e = 0
69+
d = 0
70+
71+
col.alignments.each do |aln|
72+
rec_bam = aln.record
73+
next if rec_bam.mapq < min_mapq
74+
75+
if aln.del?
76+
d += 1
77+
next
78+
end
79+
next if aln.refskip?
80+
81+
qpos = aln.query_position
82+
bq = rec_bam.base_qual(qpos)
83+
next if bq < min_baseq
84+
85+
base = rec_bam.base(qpos)
86+
case base
87+
when ref then r += 1
88+
when alt then a += 1
89+
else e += 1
90+
end
91+
end
92+
93+
[r, a, e, d]
94+
end
95+
end
96+
97+
# Check if a VCF record is a biallelic single-base SNP
98+
def biallelic_snp?(record)
99+
alleles = record.alleles
100+
alleles.length == 2 && alleles[0].length == 1 && alleles[1].length == 1
101+
end
102+
103+
opts = {
104+
min_mapq: 0,
105+
min_baseq: 0,
106+
max_depth: 4000,
107+
ignore_overlaps: false
108+
}
109+
110+
op = OptionParser.new do |o|
111+
o.banner = "Usage: #{$PROGRAM_NAME} [options] <vcf> <out.csv> <bam1> [bam2 ...]"
112+
o.on("-q INT", Integer, "Minimum mapping quality (default: #{opts[:min_mapq]})") { |v| opts[:min_mapq] = v }
113+
o.on("-Q INT", Integer, "Minimum base quality (default: #{opts[:min_baseq]})") { |v| opts[:min_baseq] = v }
114+
o.on("-d INT", Integer, "Maximum per-file depth (default: #{opts[:max_depth]})") { |v| opts[:max_depth] = v }
115+
o.on("-x", "Disable overlap detection (default: enabled)") { opts[:ignore_overlaps] = true }
116+
o.on("-h", "Show help") do
117+
puts o
118+
exit 0
119+
end
120+
end
121+
122+
begin
123+
op.order!
124+
rescue OptionParser::ParseError => e
125+
warn e.message
126+
warn op
127+
exit 1
128+
end
129+
130+
if ARGV.length < 3
131+
warn op
132+
exit 1
133+
end
134+
135+
vcf_path = ARGV.shift
136+
out_path = ARGV.shift
137+
bam_paths = ARGV
138+
139+
# Open BAM/CRAM inputs
140+
bams = bam_paths.map { |p| HTS::Bam.open(p) }
141+
142+
begin
143+
# Prepare CSV
144+
CSV.open(out_path, "w") do |csv|
145+
# Header
146+
header = %w[Chromosome Position Ref Alt]
147+
bams.each_with_index do |_b, i|
148+
idx = i + 1
149+
header += ["File#{idx}R", "File#{idx}A", "File#{idx}E", "File#{idx}D"]
150+
end
151+
csv << header
152+
153+
# Build a chrom->tid map from the first BAM header for quick checks
154+
bam0_hdr = bams.first.header
155+
156+
# Iterate VCF/BCF and stream a single Mpileup per contig
157+
HTS::Bcf.open(vcf_path) do |bcf|
158+
contig_pileup = nil
159+
160+
begin
161+
bcf.each do |rec|
162+
next unless biallelic_snp?(rec)
163+
164+
chrom = rec.chrom
165+
pos0 = rec.pos # 0-based
166+
ref = rec.alleles[0]
167+
alt = rec.alleles[1]
168+
169+
# Switch contig => reinitialize mpileup state
170+
if !contig_pileup || contig_pileup.chrom != chrom
171+
contig_pileup&.close
172+
tid = bam0_hdr.get_tid(chrom)
173+
if tid < 0
174+
contig_pileup = nil
175+
next
176+
end
177+
contig_pileup = ContigPileup.new(chrom, bams, opts)
178+
end
179+
180+
next unless contig_pileup
181+
182+
# Advance mpileup to this SNP position
183+
contig_pileup.advance_to(pos0)
184+
185+
# Compute tallies if coverage exactly at this position
186+
next unless contig_pileup.current_pos == pos0
187+
188+
tallies = compute_tallies(
189+
contig_pileup.current_columns,
190+
ref,
191+
alt,
192+
min_mapq: opts[:min_mapq],
193+
min_baseq: opts[:min_baseq]
194+
)
195+
196+
# Emit CSV row only if some counts exist
197+
next unless tallies.any? { |raed| raed.any?(&:positive?) }
198+
199+
row = [chrom, pos0 + 1, ref, alt]
200+
tallies.each { |raed| row.concat(raed) }
201+
csv << row
202+
end
203+
ensure
204+
contig_pileup&.close
205+
end
206+
end
207+
end
208+
ensure
209+
bams.each do |b|
210+
b.close
211+
rescue StandardError
212+
nil
213+
end
214+
end
215+
216+
warn "Done: wrote #{out_path}"

0 commit comments

Comments
 (0)