Skip to content

Commit 9821c6e

Browse files
committed
Improve performance of pileup iterator
1 parent 42d7e85 commit 9821c6e

File tree

2 files changed

+75
-14
lines changed

2 files changed

+75
-14
lines changed

lib/hts/bam/pileup.rb

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -129,25 +129,43 @@ def each
129129
pos_ptr = FFI::MemoryPointer.new(:long_long) # hts_pos_t
130130
n_ptr = FFI::MemoryPointer.new(:int)
131131

132+
# Micro-optimizations:
133+
# - Compute constant struct size once
134+
# - Hoist header reference outside the loop
135+
plp1_size = HTS::LibHTS::BamPileup1.size
136+
header_local = @header
137+
132138
begin
133-
while (base_ptr = HTS::LibHTS.bam_plp64_auto(@plp, tid_ptr, pos_ptr, n_ptr)) && !base_ptr.null?
139+
loop do
140+
base_ptr = HTS::LibHTS.bam_plp64_auto(@plp, tid_ptr, pos_ptr, n_ptr)
141+
142+
# When base_ptr is NULL, check n to distinguish EOF (n == 0) from error (n < 0)
143+
if base_ptr.null?
144+
n = n_ptr.read_int
145+
raise "HTSlib pileup error (bam_plp64_auto)" if n < 0
146+
147+
break
148+
end
149+
134150
tid = tid_ptr.read_int
135151
pos = pos_ptr.read_long_long
136152
n = n_ptr.read_int
137153

138-
# Construct alignment entries
139-
alignments = if n.zero?
140-
[]
141-
else
142-
size = HTS::LibHTS::BamPileup1.size
143-
n.times.map do |i|
144-
e_ptr = base_ptr + (i * size)
145-
entry = HTS::LibHTS::BamPileup1.new(e_ptr)
146-
PileupRecord.new(entry, @header)
147-
end
148-
end
149-
150-
yield PileupColumn.new(tid:, pos:, alignments:)
154+
# Construct alignment entries with minimal allocations
155+
if n.zero?
156+
alignments = []
157+
else
158+
alignments = Array.new(n)
159+
i = 0
160+
while i < n
161+
e_ptr = base_ptr + (i * plp1_size)
162+
entry = HTS::LibHTS::BamPileup1.new(e_ptr)
163+
alignments[i] = PileupRecord.new(entry, header_local)
164+
i += 1
165+
end
166+
end
167+
168+
yield PileupColumn.new(tid: tid, pos: pos, alignments: alignments)
151169
end
152170
ensure
153171
close

test/bam/pileup_test.rb

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ def test_pileup_yields_columns
1717
assert_kind_of Array, got.alignments
1818
assert_kind_of Integer, got.depth
1919
assert_operator got.depth, :>=, 0
20+
21+
# depth must equal the number of alignments in the column
22+
assert_equal got.alignments.length, got.depth
23+
24+
# tid must be a valid reference id within the header target count
25+
assert_operator got.tid, :>=, 0
26+
assert_operator got.tid, :<, bam.header.target_count
2027
end
2128
end
2229

@@ -57,4 +64,40 @@ def test_pileup_record_persists_beyond_step
5764
assert_includes %w[= A C G T M R S V W Y H K D B N], kept.base(0)
5865
end
5966
end
67+
68+
# Calling record multiple times must return the same instance (idempotent lazy copy).
69+
def test_record_idempotent
70+
HTS::Bam.open(Fixtures["moo.bam"]) do |bam|
71+
r1 = r2 = nil
72+
bam.pileup do |col|
73+
next if col.alignments.empty?
74+
75+
aln = col.alignments.first
76+
r1 = aln.record
77+
r2 = aln.record
78+
break
79+
end
80+
refute_nil r1
81+
assert_same r1, r2
82+
end
83+
end
84+
85+
# HTSlib contract: if is_refskip is set, is_del must also be set.
86+
def test_refskip_implies_del
87+
HTS::Bam.open(Fixtures["moo.bam"]) do |bam|
88+
seen_refskip = false
89+
checked = 0
90+
bam.pileup do |col|
91+
col.alignments.each do |aln|
92+
if aln.refskip?
93+
seen_refskip = true
94+
assert aln.del?, "refskip implies del in bam_pileup1_t"
95+
end
96+
end
97+
checked += 1
98+
break if seen_refskip || checked >= 200
99+
end
100+
# If no refskip appears in the inspected window, we don't fail the test.
101+
end
102+
end
60103
end

0 commit comments

Comments
 (0)