Skip to content

Commit 1f50b35

Browse files
committed
Add auxiliary tag manipulation examples and tests
1 parent 5df82e5 commit 1f50b35

File tree

4 files changed

+457
-0
lines changed

4 files changed

+457
-0
lines changed

TUTORIAL.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,51 @@ in.close
254254
out.close
255255
```
256256

257+
Writing and modifying auxiliary tags
258+
259+
```ruby
260+
# Reading auxiliary tags
261+
bam = HTS::Bam.open("input.bam")
262+
record = bam.first
263+
aux = record.aux
264+
265+
# Read tags
266+
alignment_score = aux["AS"] # Auto-detect type
267+
mc_cigar = aux.get_string("MC") # Type-specific getter
268+
edit_distance = aux.get_int("NM") # Type-specific getter
269+
270+
# Writing/updating auxiliary tags
271+
in_bam = HTS::Bam.open("input.bam")
272+
out_bam = HTS::Bam.open("output.bam", "wb")
273+
out_bam.write_header(in_bam.header)
274+
275+
in_bam.each do |record|
276+
aux = record.aux
277+
278+
# Update or add tags using type-specific methods
279+
aux.update_int("AS", 100) # Integer tag
280+
aux.update_float("ZQ", 0.95) # Float tag
281+
aux.update_string("RG", "sample1") # String tag
282+
aux.update_array("BC", [25, 30, 28, 32]) # Array tag
283+
284+
# Or use the []= operator (auto-detects type)
285+
aux["NM"] = 2 # Integer
286+
aux["ZS"] = "modified" # String
287+
aux["ZF"] = 3.14 # Float
288+
aux["ZA"] = [1, 2, 3, 4] # Array
289+
290+
# Check if tag exists
291+
if aux.key?("XS")
292+
aux.delete("XS") # Delete tag
293+
end
294+
295+
out_bam.write(record)
296+
end
297+
298+
in_bam.close
299+
out_bam.close
300+
```
301+
257302
Create index
258303

259304
```ruby

examples/bam-write.rb

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
# Example: Writing BAM files
5+
#
6+
# This example demonstrates how to:
7+
# - Read BAM files and write modified records to a new BAM file
8+
# - Modify record fields (MAPQ, flags, positions, etc.)
9+
# - Add, update, and delete auxiliary tags
10+
# - Filter records based on criteria
11+
#
12+
# Usage: bam-write.rb <input.bam> <output.bam>
13+
14+
require "htslib"
15+
16+
def main
17+
if ARGV.size < 2
18+
warn "Usage: #{$PROGRAM_NAME} <input.bam> <output.bam>"
19+
warn ""
20+
warn "This example reads a BAM file, modifies records, and writes to a new file."
21+
warn ""
22+
warn "Examples:"
23+
warn " #{$PROGRAM_NAME} input.bam output.bam"
24+
exit 1
25+
end
26+
27+
input_path = ARGV[0]
28+
output_path = ARGV[1]
29+
30+
# Open input BAM file
31+
input_bam = HTS::Bam.new(input_path)
32+
header = input_bam.header
33+
34+
# Open output BAM file for writing
35+
output_bam = HTS::Bam.new(output_path, "wb")
36+
output_bam.write_header(header)
37+
38+
written_count = 0
39+
filtered_count = 0
40+
41+
# Process each record
42+
input_bam.each do |record|
43+
# Example 1: Filter records by mapping quality
44+
# Skip records with low mapping quality
45+
if record.mapq < 10
46+
filtered_count += 1
47+
next
48+
end
49+
50+
# Example 2: Modify record fields
51+
# Adjust mapping quality for high-confidence alignments
52+
if record.mapq > 50
53+
record.mapq = 60 # Cap at 60
54+
end
55+
56+
# Example 3: Modify flags
57+
# Mark duplicates (example logic)
58+
if record.pos % 100 == 0 # Arbitrary example
59+
record.flag
60+
# This is just an example - real duplicate marking is more complex
61+
# flag |= 1024 # Set duplicate flag
62+
end
63+
64+
# Example 4: Work with auxiliary tags
65+
aux = record.aux
66+
67+
# Add or update tags using type-specific methods
68+
aux.update_int("AS", record.mapq * 2) # Alignment score based on MAPQ
69+
aux.update_string("PG", "bam-write") # Program name
70+
71+
# Or use the []= operator (auto-detects type)
72+
aux["NM"] = 0 # Reset edit distance (example)
73+
aux["ZP"] = "processed" # Custom processing flag
74+
75+
# Add custom quality metrics
76+
if record.mapq > 30
77+
aux.update_float("ZQ", 0.95) # High quality score
78+
aux["HQ"] = 1 # High quality flag
79+
else
80+
aux.update_float("ZQ", 0.50) # Medium quality score
81+
end
82+
83+
# Delete tags if needed (example: remove XS tag)
84+
aux.delete("XS") if aux.key?("XS")
85+
86+
# Example 5: Add array tags
87+
# Add custom base quality distribution (mock data)
88+
if record.seq && record.seq.length > 0
89+
# Count ACGT bases (simplified example)
90+
seq = record.seq
91+
base_counts = [
92+
seq.count("A"),
93+
seq.count("C"),
94+
seq.count("G"),
95+
seq.count("T")
96+
]
97+
aux.update_array("BC", base_counts)
98+
end
99+
100+
# Write modified record to output
101+
output_bam.write(record)
102+
written_count += 1
103+
104+
# Print progress every 10000 records
105+
warn "Processed: #{written_count} written, #{filtered_count} filtered..." if (written_count % 10_000).zero?
106+
end
107+
108+
# Clean up
109+
input_bam.close
110+
output_bam.close
111+
112+
puts "Done!"
113+
puts " Records written: #{written_count}"
114+
puts " Records filtered: #{filtered_count}"
115+
puts " Total processed: #{written_count + filtered_count}"
116+
puts " Output: #{output_path}"
117+
end
118+
119+
main if $PROGRAM_NAME == __FILE__

lib/hts/bam/auxi.rb

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,124 @@ def [](key)
5555
get(key)
5656
end
5757

58+
# Set auxiliary tag value (auto-detects type from value)
59+
# For compatibility with HTS.cr.
60+
# @param key [String] tag name (2 characters)
61+
# @param value [Integer, Float, String, Array] tag value
62+
def []=(key, value)
63+
case value
64+
when Integer
65+
update_int(key, value)
66+
when Float
67+
update_float(key, value)
68+
when String
69+
update_string(key, value)
70+
when Array
71+
update_array(key, value)
72+
else
73+
raise ArgumentError, "Unsupported type: #{value.class}"
74+
end
75+
end
76+
77+
# Update or add an integer tag
78+
# For compatibility with HTS.cr.
79+
# @param key [String] tag name (2 characters)
80+
# @param value [Integer] integer value
81+
def update_int(key, value)
82+
ret = LibHTS.bam_aux_update_int(@record.struct, key, value.to_i)
83+
raise "Failed to update integer tag '#{key}': errno #{FFI.errno}" if ret < 0
84+
85+
value
86+
end
87+
88+
# Update or add a floating-point tag
89+
# For compatibility with HTS.cr.
90+
# @param key [String] tag name (2 characters)
91+
# @param value [Float] floating-point value
92+
def update_float(key, value)
93+
ret = LibHTS.bam_aux_update_float(@record.struct, key, value.to_f)
94+
raise "Failed to update float tag '#{key}': errno #{FFI.errno}" if ret < 0
95+
96+
value
97+
end
98+
99+
# Update or add a string tag
100+
# For compatibility with HTS.cr.
101+
# @param key [String] tag name (2 characters)
102+
# @param value [String] string value
103+
def update_string(key, value)
104+
ret = LibHTS.bam_aux_update_str(@record.struct, key, -1, value.to_s)
105+
raise "Failed to update string tag '#{key}': errno #{FFI.errno}" if ret < 0
106+
107+
value
108+
end
109+
110+
# Update or add an array tag
111+
# For compatibility with HTS.cr.
112+
# @param key [String] tag name (2 characters)
113+
# @param value [Array] array of integers or floats
114+
# @param type [String, nil] element type ('c', 'C', 's', 'S', 'i', 'I', 'f'). Auto-detected if nil.
115+
def update_array(key, value, type: nil)
116+
raise ArgumentError, "Array cannot be empty" if value.empty?
117+
118+
# Auto-detect type if not specified
119+
if type.nil?
120+
if value.all? { |v| v.is_a?(Integer) }
121+
# Use 'i' for signed 32-bit integers by default
122+
type = "i"
123+
elsif value.all? { |v| v.is_a?(Float) || v.is_a?(Integer) }
124+
type = "f"
125+
else
126+
raise ArgumentError, "Array must contain only integers or floats"
127+
end
128+
end
129+
130+
# Convert array to appropriate C type
131+
case type
132+
when "c", "C", "s", "S", "i", "I"
133+
# Integer types
134+
ptr = FFI::MemoryPointer.new(:int32, value.size)
135+
ptr.write_array_of_int32(value.map(&:to_i))
136+
ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
137+
when "f"
138+
# Float type
139+
ptr = FFI::MemoryPointer.new(:float, value.size)
140+
ptr.write_array_of_float(value.map(&:to_f))
141+
ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
142+
else
143+
raise ArgumentError, "Invalid array type: #{type}"
144+
end
145+
146+
raise "Failed to update array tag '#{key}': errno #{FFI.errno}" if ret < 0
147+
148+
value
149+
end
150+
151+
# Delete an auxiliary tag
152+
# For compatibility with HTS.cr.
153+
# @param key [String] tag name (2 characters)
154+
# @return [Boolean] true if tag was deleted, false if tag was not found
155+
def delete(key)
156+
aux_ptr = LibHTS.bam_aux_get(@record.struct, key)
157+
return false if aux_ptr.null?
158+
159+
ret = LibHTS.bam_aux_del(@record.struct, aux_ptr)
160+
raise "Failed to delete tag '#{key}': errno #{FFI.errno}" if ret < 0
161+
162+
true
163+
end
164+
165+
# Check if a tag exists
166+
# For compatibility with HTS.cr.
167+
# @param key [String] tag name (2 characters)
168+
# @return [Boolean] true if tag exists
169+
def key?(key)
170+
aux_ptr = LibHTS.bam_aux_get(@record.struct, key)
171+
!aux_ptr.null?
172+
end
173+
174+
alias include? key?
175+
58176
def first
59177
aux_ptr = first_pointer
60178
return nil if aux_ptr.null?

0 commit comments

Comments
 (0)