From 2fa28e5178436747264389648fe3e1a064acf6e7 Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Wed, 8 Dec 2021 19:14:01 +0800 Subject: [PATCH 01/14] Update vcmp.c --- vcmp.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/vcmp.c b/vcmp.c index dbdc4b7ac..c1d2daf33 100644 --- a/vcmp.c +++ b/vcmp.c @@ -58,7 +58,7 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2) char *a = ref1, *b = ref2; while ( *a && *b && toupper(*a)==toupper(*b) ) { a++; b++; } - if ( !*a && !*b ) return 0; + if ( !*a && !*b ) return 1; // perfect match if ( *a && *b ) return -1; // refs not compatible int i; @@ -70,18 +70,19 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2) hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref); for (i=0; indref; i++) vcmp->dref[i] = toupper(ref1[vcmp->nmatch+i]); vcmp->dref[vcmp->ndref] = 0; - return 0; + return 0; // compatible + } else if ( *b ) { // ref2 is longer + vcmp->nmatch = a-ref1; + while ( *b ) b++; + vcmp->ndref = (b-ref2) - vcmp->nmatch; + hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref); + for (i=0; indref; i++) vcmp->dref[i] = toupper(ref2[vcmp->nmatch+i]); + vcmp->dref[vcmp->ndref] = 0; + vcmp->ndref *= -1; + return 0; // compatible + } else { + return -1; // should not happen } - - // ref2 is longer - vcmp->nmatch = a-ref1; - while ( *b ) b++; - vcmp->ndref = (b-ref2) - vcmp->nmatch; - hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref); - for (i=0; indref; i++) vcmp->dref[i] = toupper(ref2[vcmp->nmatch+i]); - vcmp->dref[vcmp->ndref] = 0; - vcmp->ndref *= -1; - return 0; } int vcmp_find_allele(vcmp_t *vcmp, char **als1, int nals1, char *al2) From ea93943f88ceabcfe2a82813b45c699c0d0809a9 Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Wed, 8 Dec 2021 19:14:49 +0800 Subject: [PATCH 02/14] Update vcfmerge.c --- vcfmerge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcfmerge.c b/vcfmerge.c index 3556c5458..7eefeb604 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -2799,7 +2799,7 @@ int can_merge(args_t *args) // All alleles of the tested record must be present in the // selected maux record plus variant types must be the same if ( (maux->var_types & line_type) != line_type ) continue; - if ( vcmp_set_ref(args->vcmp,maux->als[0],line->d.allele[0]) < 0 ) continue; // refs not compatible + if ( vcmp_set_ref(args->vcmp,maux->als[0],line->d.allele[0]) <= 0 ) continue; // refs not perfect match for (k=1; kn_allele; k++) { if ( vcmp_find_allele(args->vcmp,maux->als+1,maux->nals-1,line->d.allele[k])>=0 ) break; From 62ee321575ece1d41d852566c1183013c659af01 Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Wed, 8 Dec 2021 19:50:14 +0800 Subject: [PATCH 03/14] Create merge.10.a.vcf --- test/merge.10.a.vcf | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 test/merge.10.a.vcf diff --git a/test/merge.10.a.vcf b/test/merge.10.a.vcf new file mode 100644 index 000000000..2b9be9c0b --- /dev/null +++ b/test/merge.10.a.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.1 +##contig= +##reference=file:///ref.fa +##INFO= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A +1 10 . C G . . AN=4;AC=2 GT 0/1 +1 12 . T A . . AN=4;AC=2 GT 0/1 From 8d79d8280dc499a11e87927d462f454727a001dd Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Wed, 8 Dec 2021 19:50:43 +0800 Subject: [PATCH 04/14] Create merge.10.b.vcf --- test/merge.10.b.vcf | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 test/merge.10.b.vcf diff --git a/test/merge.10.b.vcf b/test/merge.10.b.vcf new file mode 100644 index 000000000..b8b853394 --- /dev/null +++ b/test/merge.10.b.vcf @@ -0,0 +1,8 @@ +##fileformat=VCFv4.1 +##contig= +##reference=file:///ref.fa +##INFO= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B +1 10 . CGT CGA,AGT . . AN=4;AC=1 GT 1/2 From f2af59598cb9cc007460e155e3b5ba198653ae6b Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Wed, 8 Dec 2021 19:52:49 +0800 Subject: [PATCH 05/14] Create merge.10.none.out --- test/merge.10.none.out | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 test/merge.10.none.out diff --git a/test/merge.10.none.out b/test/merge.10.none.out new file mode 100644 index 000000000..804e8a491 --- /dev/null +++ b/test/merge.10.none.out @@ -0,0 +1,9 @@ +##fileformat=VCFv4.1 +##contig= +##reference=file:///ref.fa +##INFO= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B +1 10 . CGT GGT,CGA,AGT . . AN=4;AC=1,1,1 GT 0/1 2/3 +1 12 . T A . . AN=2;AC=1 GT 0/1 ./. From d531717a69cca621b37a768ee1e06ed3fb56f633 Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Wed, 8 Dec 2021 19:54:13 +0800 Subject: [PATCH 06/14] Update test.pl --- test/test.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test.pl b/test/test.pl index a3e7840d2..e910aae2c 100755 --- a/test/test.pl +++ b/test/test.pl @@ -97,6 +97,7 @@ test_vcf_merge($opts,in=>['merge.8.a','merge.8.b'],out=>'merge.8.out',args=>'-i AN:sum,AC:sum'); test_vcf_merge($opts,in=>['merge.9.a','merge.9.b'],out=>'merge.9.1.out',args=>''); test_vcf_merge($opts,in=>['merge.9.a','merge.9.b'],out=>'merge.9.2.out',args=>'-i AN:sum,AC:sum'); +test_vcf_merge($opts,in=>['merge.10.a','merge.10.b'],out=>'merge.10.none.out',args=>'-m none'); # test_vcf_merge_big($opts,in=>'merge_big.1',out=>'merge_big.1.1',nsmpl=>79000,nfiles=>79,nalts=>486,args=>''); # commented out for speed test_vcf_query($opts,in=>'query.string',out=>'query.string.1.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided,_conflicting_interpretations"']); test_vcf_query($opts,in=>'query.string',out=>'query.string.1.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided" || CLNREVSTAT="_conflicting_interpretations"']); From 29540ec2a0d86a908a736f0e58c2d2b01c61db18 Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Wed, 8 Dec 2021 19:58:49 +0800 Subject: [PATCH 07/14] Update merge.10.none.out --- test/merge.10.none.out | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/merge.10.none.out b/test/merge.10.none.out index 804e8a491..48b0400ee 100644 --- a/test/merge.10.none.out +++ b/test/merge.10.none.out @@ -5,5 +5,6 @@ ##INFO= ##FORMAT= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B -1 10 . CGT GGT,CGA,AGT . . AN=4;AC=1,1,1 GT 0/1 2/3 +1 10 . C G . . AN=2;AC=1 GT 0/1 ./. +1 10 . CGT CGA,AGT . . AN=2;AC=1,1 GT ./. 1/2 1 12 . T A . . AN=2;AC=1 GT 0/1 ./. From 3ec651e0fe57c3ad989d6edfcfead65d523fbc5d Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Wed, 8 Dec 2021 20:01:49 +0800 Subject: [PATCH 08/14] Update merge.10.none.out --- test/merge.10.none.out | 1 + 1 file changed, 1 insertion(+) diff --git a/test/merge.10.none.out b/test/merge.10.none.out index 48b0400ee..773da6351 100644 --- a/test/merge.10.none.out +++ b/test/merge.10.none.out @@ -1,4 +1,5 @@ ##fileformat=VCFv4.1 +##FILTER= ##contig= ##reference=file:///ref.fa ##INFO= From 076f70db65f1323effba5d414d397b7e86b85c60 Mon Sep 17 00:00:00 2001 From: StarSkyZheng <12579814+starskyzheng@users.noreply.github.com> Date: Thu, 10 Nov 2022 23:58:40 +0800 Subject: [PATCH 09/14] add option: non_normalize_alleles --- vcfmerge.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vcfmerge.c b/vcfmerge.c index d581b5a3d..65899a4b2 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -973,7 +973,7 @@ void merge_chrom2qual(args_t *args, bcf1_t *out) for (i=0; inals; i++) if ( i==0 || al_idxs[i] ) ma->out_als[k++] = strdup(ma->als[i]); assert( k==ma->nout_als ); - normalize_alleles(ma->out_als, ma->nout_als); + if (args->non_normalize_alleles != 1) normalize_alleles(ma->out_als, ma->nout_als); bcf_update_alleles(out_hdr, out, (const char**) ma->out_als, ma->nout_als); free(al_idxs); for (i=0; inout_als; i++) free(ma->out_als[i]); @@ -3134,6 +3134,7 @@ static void usage(void) fprintf(stderr, " -R, --regions-file FILE Restrict to regions listed in a file\n"); fprintf(stderr, " --regions-overlap 0|1|2 Include if POS in the region (0), record overlaps (1), variant overlaps (2) [1]\n"); fprintf(stderr, " --threads INT Use multithreading with worker threads [0]\n"); + fprintf(stderr, " -N, --non_normalize_alleles Do not normalize_alleles\n"); fprintf(stderr, "\n"); exit(1); } @@ -3175,11 +3176,14 @@ int main_vcfmerge(int argc, char *argv[]) {"no-version",no_argument,NULL,8}, {"no-index",no_argument,NULL,10}, {"filter-logic",required_argument,NULL,'F'}, + {"non_normalize_alleles",required_argument,NULL,'N'}, {NULL,0,NULL,0} }; char *tmp; - while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0L:",loptions,NULL)) >= 0) { + while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0L:N",loptions,NULL)) >= 0) { switch (c) { + case 'N': + args->non_normalize_alleles = 1; case 'L': args->local_alleles = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: --local-alleles %s\n", optarg); From ea064d0cd71c9cf386be91059a60b377e61cb4aa Mon Sep 17 00:00:00 2001 From: StarSkyZheng <12579814+starskyzheng@users.noreply.github.com> Date: Thu, 10 Nov 2022 23:59:37 +0800 Subject: [PATCH 10/14] Update vcfmerge.c --- vcfmerge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcfmerge.c b/vcfmerge.c index 65899a4b2..68ca2fc81 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -149,7 +149,7 @@ typedef struct maux_t *maux; regidx_t *regs; // apply regions only after the blocks are expanded regitr_t *regs_itr; - int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref, no_index; + int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref, no_index, non_normalize_alleles; char *header_fname, *output_fname, *regions_list, *info_rules, *file_list; faidx_t *gvcf_fai; info_rule_t *rules; From 6b4f22eb10c673cf8456553da15f8a7e1d504d28 Mon Sep 17 00:00:00 2001 From: StarSkyZheng <12579814+starskyzheng@users.noreply.github.com> Date: Fri, 11 Nov 2022 01:32:33 +0800 Subject: [PATCH 11/14] Update vcfmerge.c --- vcfmerge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcfmerge.c b/vcfmerge.c index 68ca2fc81..d96c0fc4e 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -3176,7 +3176,7 @@ int main_vcfmerge(int argc, char *argv[]) {"no-version",no_argument,NULL,8}, {"no-index",no_argument,NULL,10}, {"filter-logic",required_argument,NULL,'F'}, - {"non_normalize_alleles",required_argument,NULL,'N'}, + {"non_normalize_alleles",no_argument,NULL,'N'}, {NULL,0,NULL,0} }; char *tmp; From 28d6e52736bc16df53c67b2f6173892df84ff863 Mon Sep 17 00:00:00 2001 From: StarSkyZheng <12579814+starskyzheng@users.noreply.github.com> Date: Fri, 11 Nov 2022 01:37:22 +0800 Subject: [PATCH 12/14] Update vcfmerge.c --- vcfmerge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcfmerge.c b/vcfmerge.c index d96c0fc4e..4c680c7d2 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -3183,7 +3183,7 @@ int main_vcfmerge(int argc, char *argv[]) while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0L:N",loptions,NULL)) >= 0) { switch (c) { case 'N': - args->non_normalize_alleles = 1; + args->non_normalize_alleles = 1; break; case 'L': args->local_alleles = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: --local-alleles %s\n", optarg); From 5707436ae404bdf19a0ed68e2c0453ce6d771bd3 Mon Sep 17 00:00:00 2001 From: StarSkyZheng Date: Fri, 11 Nov 2022 01:43:56 +0800 Subject: [PATCH 13/14] fix --- Makefile | 2 +- vcfmerge.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index ad87f022b..7bbf6ccb4 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ CC = gcc AR = ar RANLIB = ranlib CPPFLAGS = -CFLAGS = -g -Wall -O2 +CFLAGS = -g -Wall -O2 -std=c99 LDFLAGS = LIBS = diff --git a/vcfmerge.c b/vcfmerge.c index 68ca2fc81..528294cf4 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -3151,6 +3151,7 @@ int main_vcfmerge(int argc, char *argv[]) args->record_cmd_line = 1; args->collapse = COLLAPSE_BOTH; args->clevel = -1; + args->non_normalize_alleles = 0; int regions_is_file = 0; int regions_overlap = 1; @@ -3176,14 +3177,13 @@ int main_vcfmerge(int argc, char *argv[]) {"no-version",no_argument,NULL,8}, {"no-index",no_argument,NULL,10}, {"filter-logic",required_argument,NULL,'F'}, - {"non_normalize_alleles",required_argument,NULL,'N'}, + {"non_normalize_alleles",no_argument,NULL,'N'}, {NULL,0,NULL,0} }; char *tmp; while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0L:N",loptions,NULL)) >= 0) { switch (c) { - case 'N': - args->non_normalize_alleles = 1; + case 'N': args->non_normalize_alleles = 1; break; case 'L': args->local_alleles = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: --local-alleles %s\n", optarg); From 921d4d5fcf296ca837fd44c49e07d974b73d8d8b Mon Sep 17 00:00:00 2001 From: StarSkyZheng <12579814+starskyzheng@users.noreply.github.com> Date: Fri, 11 Nov 2022 02:03:26 +0800 Subject: [PATCH 14/14] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7bbf6ccb4..ad87f022b 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ CC = gcc AR = ar RANLIB = ranlib CPPFLAGS = -CFLAGS = -g -Wall -O2 -std=c99 +CFLAGS = -g -Wall -O2 LDFLAGS = LIBS =