Skip to content
Open
9 changes: 9 additions & 0 deletions test/merge.10.a.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##fileformat=VCFv4.1
##contig=<ID=1,assembly=b37,length=249250621>
##reference=file:///ref.fa
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotypes">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
1 10 . C G . . AN=4;AC=2 GT 0/1
1 12 . T A . . AN=4;AC=2 GT 0/1
8 changes: 8 additions & 0 deletions test/merge.10.b.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.1
##contig=<ID=1,assembly=b37,length=249250621>
##reference=file:///ref.fa
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotypes">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B
1 10 . CGT CGA,AGT . . AN=4;AC=1 GT 1/2
11 changes: 11 additions & 0 deletions test/merge.10.none.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=1,assembly=b37,length=249250621>
##reference=file:///ref.fa
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotypes">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
1 10 . C G . . AN=2;AC=1 GT 0/1 ./.
1 10 . CGT CGA,AGT . . AN=2;AC=1,1 GT ./. 1/2
1 12 . T A . . AN=2;AC=1 GT 0/1 ./.
1 change: 1 addition & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
test_vcf_merge($opts,in=>['merge.8.a','merge.8.b'],out=>'merge.8.out',args=>'-i AN:sum,AC:sum');
test_vcf_merge($opts,in=>['merge.9.a','merge.9.b'],out=>'merge.9.1.out',args=>'');
test_vcf_merge($opts,in=>['merge.9.a','merge.9.b'],out=>'merge.9.2.out',args=>'-i AN:sum,AC:sum');
test_vcf_merge($opts,in=>['merge.10.a','merge.10.b'],out=>'merge.10.none.out',args=>'-m none');
# test_vcf_merge_big($opts,in=>'merge_big.1',out=>'merge_big.1.1',nsmpl=>79000,nfiles=>79,nalts=>486,args=>''); # commented out for speed
test_vcf_query($opts,in=>'query.string',out=>'query.string.1.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided,_conflicting_interpretations"']);
test_vcf_query($opts,in=>'query.string',out=>'query.string.1.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided" || CLNREVSTAT="_conflicting_interpretations"']);
Expand Down
12 changes: 8 additions & 4 deletions vcfmerge.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ typedef struct
maux_t *maux;
regidx_t *regs; // apply regions only after the blocks are expanded
regitr_t *regs_itr;
int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref, no_index;
int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref, no_index, non_normalize_alleles;
char *header_fname, *output_fname, *regions_list, *info_rules, *file_list;
faidx_t *gvcf_fai;
info_rule_t *rules;
Expand Down Expand Up @@ -973,7 +973,7 @@ void merge_chrom2qual(args_t *args, bcf1_t *out)
for (i=0; i<ma->nals; i++)
if ( i==0 || al_idxs[i] ) ma->out_als[k++] = strdup(ma->als[i]);
assert( k==ma->nout_als );
normalize_alleles(ma->out_als, ma->nout_als);
if (args->non_normalize_alleles != 1) normalize_alleles(ma->out_als, ma->nout_als);
bcf_update_alleles(out_hdr, out, (const char**) ma->out_als, ma->nout_als);
free(al_idxs);
for (i=0; i<ma->nout_als; i++) free(ma->out_als[i]);
Expand Down Expand Up @@ -2799,7 +2799,7 @@ int can_merge(args_t *args)
// All alleles of the tested record must be present in the
// selected maux record plus variant types must be the same
if ( (maux->var_types & line_type) != line_type ) continue;
if ( vcmp_set_ref(args->vcmp,maux->als[0],line->d.allele[0]) < 0 ) continue; // refs not compatible
if ( vcmp_set_ref(args->vcmp,maux->als[0],line->d.allele[0]) <= 0 ) continue; // refs not perfect match
for (k=1; k<line->n_allele; k++)
{
if ( vcmp_find_allele(args->vcmp,maux->als+1,maux->nals-1,line->d.allele[k])>=0 ) break;
Expand Down Expand Up @@ -3134,6 +3134,7 @@ static void usage(void)
fprintf(stderr, " -R, --regions-file FILE Restrict to regions listed in a file\n");
fprintf(stderr, " --regions-overlap 0|1|2 Include if POS in the region (0), record overlaps (1), variant overlaps (2) [1]\n");
fprintf(stderr, " --threads INT Use multithreading with <int> worker threads [0]\n");
fprintf(stderr, " -N, --non_normalize_alleles Do not normalize_alleles\n");
fprintf(stderr, "\n");
exit(1);
}
Expand All @@ -3150,6 +3151,7 @@ int main_vcfmerge(int argc, char *argv[])
args->record_cmd_line = 1;
args->collapse = COLLAPSE_BOTH;
args->clevel = -1;
args->non_normalize_alleles = 0;
int regions_is_file = 0;
int regions_overlap = 1;

Expand All @@ -3175,11 +3177,13 @@ int main_vcfmerge(int argc, char *argv[])
{"no-version",no_argument,NULL,8},
{"no-index",no_argument,NULL,10},
{"filter-logic",required_argument,NULL,'F'},
{"non_normalize_alleles",no_argument,NULL,'N'},
{NULL,0,NULL,0}
};
char *tmp;
while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0L:",loptions,NULL)) >= 0) {
while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0L:N",loptions,NULL)) >= 0) {
switch (c) {
case 'N': args->non_normalize_alleles = 1; break;
case 'L':
args->local_alleles = strtol(optarg,&tmp,10);
if ( *tmp ) error("Could not parse argument: --local-alleles %s\n", optarg);
Expand Down
25 changes: 13 additions & 12 deletions vcmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2)

char *a = ref1, *b = ref2;
while ( *a && *b && toupper(*a)==toupper(*b) ) { a++; b++; }
if ( !*a && !*b ) return 0;
if ( !*a && !*b ) return 1; // perfect match
if ( *a && *b ) return -1; // refs not compatible

int i;
Expand All @@ -70,18 +70,19 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2)
hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref);
for (i=0; i<vcmp->ndref; i++) vcmp->dref[i] = toupper(ref1[vcmp->nmatch+i]);
vcmp->dref[vcmp->ndref] = 0;
return 0;
return 0; // compatible
} else if ( *b ) { // ref2 is longer
vcmp->nmatch = a-ref1;
while ( *b ) b++;
vcmp->ndref = (b-ref2) - vcmp->nmatch;
hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref);
for (i=0; i<vcmp->ndref; i++) vcmp->dref[i] = toupper(ref2[vcmp->nmatch+i]);
vcmp->dref[vcmp->ndref] = 0;
vcmp->ndref *= -1;
return 0; // compatible
} else {
return -1; // should not happen
}

// ref2 is longer
vcmp->nmatch = a-ref1;
while ( *b ) b++;
vcmp->ndref = (b-ref2) - vcmp->nmatch;
hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref);
for (i=0; i<vcmp->ndref; i++) vcmp->dref[i] = toupper(ref2[vcmp->nmatch+i]);
vcmp->dref[vcmp->ndref] = 0;
vcmp->ndref *= -1;
return 0;
}

int vcmp_find_allele(vcmp_t *vcmp, char **als1, int nals1, char *al2)
Expand Down