22
33
44__author__ = 'Francesco Asnicar (f.asnicar@unitn.it)'
5- __version__ = '0.2.3 '
6- __date__ = '11 November 2019'
5+ __version__ = '0.2.4 '
6+ __date__ = '30 December 2019'
77
88
99import os
@@ -51,6 +51,8 @@ def read_params():
5151 p .add_argument ('-i' , '--input_dir' , required = True , type = str , help = "Path to input directory" )
5252 p .add_argument ('-e' , '--extension' , required = False , default = ".fastq.gz" ,
5353 choices = [".fastq.gz" , ".fq.gz" , ".fastq.bz2" , ".fq.bz2" ], help = "The extension of the raw input files" )
54+ p .add_argument ('-s' , '--samplename' , required = False , default = "" , help = "Specify the sample name" )
55+
5456 p .add_argument ('-f' , '--forward' , required = False , default = "R1" ,
5557 help = "Identifier to distinguish forward reads in the input folder" )
5658 p .add_argument ('-r' , '--reverse' , required = False , default = "R2" ,
@@ -114,12 +116,12 @@ def preflight_check(dry_run=False, verbose=False):
114116 error ('preflight_check()\n {}\n {}' .format (cmd , e ), exit = True )
115117
116118
117- def get_inputs (input_dir , fwd , rev , ext , verbose = False ):
119+ def get_inputs (input_dir , fwd , rev , sn , ext , verbose = False ):
118120 if verbose :
119121 info ('get_inputs()\n ' , init_new_line = True )
120122
121- R1 = sorted (glob . glob ( os .path .join (input_dir , '*{}*{}' . format ( fwd , ext ))) )
122- R2 = sorted (glob . glob ( os .path .join (input_dir , '*{}*{}' . format ( rev , ext ))) )
123+ R1 = sorted ([ os .path .join (input_dir , i ) for i in os . listdir ( input_dir ) if ( fwd in i . replace ( sn , '' )) and i . endswith ( ext )] )
124+ R2 = sorted ([ os .path .join (input_dir , i ) for i in os . listdir ( input_dir ) if ( rev in i . replace ( sn , '' )) and i . endswith ( ext )] )
123125
124126 return (R1 , R2 )
125127
@@ -197,7 +199,7 @@ def concatenate_reads_mp(x):
197199 terminating .set ()
198200
199201
200- def quality_control (input_dir , merged_r1_r2 , keep_intermediate , nproc = 1 , dry_run = False , verbose = False ):
202+ def quality_control (input_dir , merged_r1_r2 , keep_intermediate , sn , nproc = 1 , dry_run = False , verbose = False ):
201203 if dry_run or verbose :
202204 info ('quality_control()\n ' , init_new_line = True )
203205
@@ -213,8 +215,8 @@ def quality_control(input_dir, merged_r1_r2, keep_intermediate, nproc=1, dry_run
213215 except Exception as e :
214216 error ('quality_control()\n tasks: {}\n e: {}' .format (tasks , e ), init_new_line = True , exit = True )
215217
216- r1 = [i for i in qc if "R1" in i ]
217- r2 = [i for i in qc if "R2" in i ]
218+ r1 = [i for i in qc if "R1" in i . replace ( sn , '' ) ]
219+ r2 = [i for i in qc if "R2" in i . replace ( sn , '' ) ]
218220
219221 if len (r1 ) > 1 :
220222 error ('quality_control(): more than one R1 detected: [{}]' .format (', ' .join (r1 )), exit = True )
@@ -478,7 +480,10 @@ def remove(to_remove, keep_intermediate, folder=None, dry_run=False, verbose=Fal
478480
479481 check_params (args )
480482 preflight_check (dry_run = args .dry_run , verbose = args .verbose )
481- inputs_r1s_r2s = get_inputs (args .input_dir , args .forward , args .reverse , args .extension , verbose = args .verbose )
483+ inputs_r1s_r2s = get_inputs (args .input_dir , args .forward , args .reverse , args .samplename , args .extension , verbose = args .verbose )
484+
485+ if (len (inputs_r1s_r2s [0 ]) == 0 ) or (len (inputs_r1s_r2s [1 ]) == 0 ):
486+ error ('No input files detected!\n R1s: {}\n R2s: {}' .format (inputs_r1s_r2s [0 ], inputs_r1s_r2s [1 ]), exit = True )
482487
483488 if args .dry_run or args .verbose :
484489 info ('inputs_r1s: {}\n ' .format ('\n ' .join (inputs_r1s_r2s [0 ])), init_new_line = True )
@@ -490,7 +495,7 @@ def remove(to_remove, keep_intermediate, folder=None, dry_run=False, verbose=Fal
490495 info ('merged_r1: {}\n ' .format (merged_r1_r2 [0 ]), init_new_line = True )
491496 info ('merged_r2: {}\n ' .format (merged_r1_r2 [1 ]))
492497
493- qced_r1_r2 = quality_control (args .input_dir , merged_r1_r2 , args .keep_intermediate ,
498+ qced_r1_r2 = quality_control (args .input_dir , merged_r1_r2 , args .keep_intermediate , args . samplename ,
494499 nproc = args .nproc , dry_run = args .dry_run , verbose = args .verbose )
495500 remove (merged_r1_r2 , args .keep_intermediate , folder = args .input_dir , dry_run = args .dry_run , verbose = args .verbose )
496501
0 commit comments