Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions header.c
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,8 @@ int sam_hdr_update_target_arrays(sam_hdr_t *bh, const sam_hrecs_t *hrecs,
// hrecs->refs_changed is the first ref that has been updated, so ones
// before that can be skipped.
int i;
khint_t k;
khash_t(s2i) *long_refs = (khash_t(s2i) *) bh->sdict;
for (i = refs_changed; i < hrecs->nref; i++) {
if (i >= bh->n_targets
|| strcmp(bh->target_name[i], hrecs->ref[i].name) != 0) {
Expand All @@ -927,13 +929,40 @@ int sam_hdr_update_target_arrays(sam_hdr_t *bh, const sam_hrecs_t *hrecs,
}
if (hrecs->ref[i].len < UINT32_MAX) {
bh->target_len[i] = hrecs->ref[i].len;

if (!long_refs)
continue;

// Check if we have an old length, if so remove it.
k = kh_get(s2i, long_refs, bh->target_name[i]);
if (k < kh_end(long_refs))
kh_del(s2i, long_refs, k);
} else {
bh->target_len[i] = UINT32_MAX;
if (bh->hrecs != hrecs) {
// Called from sam_hdr_dup; need to add sdict entries
if (!long_refs) {
if (!(bh->sdict = long_refs = kh_init(s2i)))
return -1;
}

// Add / update length
int absent;
k = kh_put(s2i, long_refs, bh->target_name[i], &absent);
if (absent < 0)
return -1;
kh_val(long_refs, k) = hrecs->ref[i].len;
}
}
}

// Free up any names that have been removed
for (; i < bh->n_targets; i++) {
if (long_refs) {
k = kh_get(s2i, long_refs, bh->target_name[i]);
if (k < kh_end(long_refs))
kh_del(s2i, long_refs, k);
}
free(bh->target_name[i]);
}

Expand Down
31 changes: 31 additions & 0 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,33 @@ void sam_hdr_destroy(sam_hdr_t *bh)
free(bh);
}

// Copy the sam_hdr_t::sdict hash, used to store the real lengths of long
// references before sam_hdr_t::hrecs is populated
int sam_hdr_dup_sdict(const sam_hdr_t *h0, sam_hdr_t *h)
{
const khash_t(s2i) *src_long_refs = (khash_t(s2i) *) h0->sdict;
khash_t(s2i) *dest_long_refs = kh_init(s2i);
int i;
if (!dest_long_refs) return -1;

for (i = 0; i < h->n_targets; i++) {
int ret;
khiter_t ksrc, kdest;
if (h->target_len[i] < UINT32_MAX) continue;
ksrc = kh_get(s2i, src_long_refs, h->target_name[i]);
if (ksrc == kh_end(src_long_refs)) continue;
kdest = kh_put(s2i, dest_long_refs, h->target_name[i], &ret);
if (ret < 0) {
kh_destroy(s2i, dest_long_refs);
return -1;
}
kh_val(dest_long_refs, kdest) = kh_val(src_long_refs, ksrc);
}

h->sdict = dest_long_refs;
return 0;
}

sam_hdr_t *sam_hdr_dup(const sam_hdr_t *h0)
{
if (h0 == NULL) return NULL;
Expand All @@ -157,6 +184,10 @@ sam_hdr_t *sam_hdr_dup(const sam_hdr_t *h0)
}
h->n_targets = i;
if (i < h0->n_targets) goto fail;

if (h0->sdict) {
if (sam_hdr_dup_sdict(h0, h) < 0) goto fail;
}
}

if (h0->hrecs) {
Expand Down
50 changes: 40 additions & 10 deletions test/sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -1226,6 +1226,23 @@ static void samrecord_layout(void)
"test/sam_alignment.tmp.sam_", "w", NULL);
}

static int check_ref_lengths(const sam_hdr_t *header,
const hts_pos_t *expected_lengths,
int num_refs, const char *hdr_name)
{
int i;
for (i = 0; i < num_refs; i++) {
hts_pos_t ln = sam_hdr_tid2len(header, i);
if (ln != expected_lengths[i]) {
fail("Wrong length for %s ref %d : "
"expected %"PRIhts_pos" got %"PRIhts_pos"\n",
hdr_name, i, expected_lengths[i], ln);
return -1;
}
}
return 0;
}

static void check_big_ref(int parse_header)
{
static const char sam_text[] = "data:,"
Expand Down Expand Up @@ -1257,7 +1274,7 @@ static void check_big_ref(int parse_header)
-1, -1, -1, 9223372034707292150LL - 1, 1LL - 1, -1
};
samFile *in = NULL, *out = NULL;
sam_hdr_t *header = NULL;
sam_hdr_t *header = NULL, *dup_header = NULL;
bam1_t *aln = bam_init1();
const int num_refs = sizeof(expected_lengths) / sizeof(expected_lengths[0]);
const int num_align = sizeof(expected_tids) / sizeof(expected_tids[0]);
Expand Down Expand Up @@ -1288,21 +1305,33 @@ static void check_big_ref(int parse_header)
goto cleanup;
}
if (parse_header) {
// This will force the reader to be parsed
// This will force the header to be parsed
if (sam_hdr_count_lines(header, "SQ") != num_refs) {
fail("Wrong number of SQ lines in header");
goto cleanup;
}
}
for (i = 0; i < num_refs; i++) {
hts_pos_t ln = sam_hdr_tid2len(header, i);
if (ln != expected_lengths[i]) {
fail("Wrong length for ref %d : "
"expected %"PRIhts_pos" got %"PRIhts_pos"\n",
i, expected_lengths[i], ln);
goto cleanup;
}
if (check_ref_lengths(header, expected_lengths, num_refs, "header") < 0)
goto cleanup;

dup_header = sam_hdr_dup(header);
if (!dup_header) {
fail("Failed to duplicate header");
}

if (check_ref_lengths(dup_header, expected_lengths,
num_refs, "duplicate header") < 0)
goto cleanup;

if (sam_hdr_count_lines(dup_header, "SQ") != num_refs) {
fail("Wrong number of SQ lines in duplicate header");
goto cleanup;
}

if (check_ref_lengths(dup_header, expected_lengths,
num_refs, "parsed duplicate header") < 0)
goto cleanup;

if (sam_hdr_write(out, header) < 0) {
fail("Failed to write SAM header");
goto cleanup;
Expand Down Expand Up @@ -1379,6 +1408,7 @@ static void check_big_ref(int parse_header)
cleanup:
bam_destroy1(aln);
sam_hdr_destroy(header);
sam_hdr_destroy(dup_header);
if (in) sam_close(in);
if (out) sam_close(out);
if (inf) fclose(inf);
Expand Down
6 changes: 6 additions & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,12 @@ sub test_view
testv $opts, "./test_view $tv_args -p longrefs/longref.tmp.sam_ longrefs/longref.tmp.sam.gz";
testv $opts, "./compare_sam.pl longrefs/longref.sam longrefs/longref.tmp.sam_";

# CRAM disabled for now as the positions cannot be 32-bit. (These tests are useful for
# checking SQ headers only.)
# testv $opts, "./test_view $tv_args -C -o no_ref -p longrefs/longref.tmp.cram longrefs/longref.sam";
# testv $opts, "./test_view $tv_args -p longrefs/longref.tmp.sam_ longrefs/longref.tmp.cram";
# testv $opts, "./compare_sam.pl longrefs/longref.sam longrefs/longref.tmp.sam_";

# Build index and compare with on-the-fly one made earlier.
test_compare $opts, "$$opts{path}/test_index -c longrefs/longref.tmp.sam.gz", "longrefs/longref.tmp.sam.gz.csi.otf", "longrefs/longref.tmp.sam.gz.csi", gz=>1;

Expand Down