From 050482ef778fed56411b945f7836a689a2ba5209 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 13 Nov 2014 19:52:47 -0500 Subject: [PATCH] fine tune --- README-alt.md | 2 +- extras/run-bwamem | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/README-alt.md b/README-alt.md index cccad4f..40fb9d1 100644 --- a/README-alt.md +++ b/README-alt.md @@ -13,7 +13,7 @@ wget -O- http://sourceforge.net/projects/bio-bwa/files/bwakit-0.7.11_x64-linux.t bwa.kit/run-gen-hs38d6 # download GRCh38 and write hs38d6.fa bwa.kit/bwa index hs38d6.fa # create BWA index # mapping -bwa.kit/run-bwamem hs38d6.fa read1.fq read2.fq +bwa.kit/run-bwamem hs38d6.fa read1.fq read2.fq | sh ``` In the final alignment, a read may be placed on the [primary assembly][grcdef] diff --git a/extras/run-bwamem b/extras/run-bwamem index c01fc5d..30e682d 100755 --- a/extras/run-bwamem +++ b/extras/run-bwamem @@ -5,7 +5,7 @@ use warnings; use Getopt::Std; my %opts = (t=>1, n=>64); -getopts("SpADso:R:x:t:", \%opts); +getopts("SpADsko:R:x:t:", \%opts); die(' Usage: run-bwamem [options] [file2] @@ -13,6 +13,9 @@ Usage: run-bwamem [options] [file2] Options: -o STR prefix for output files [inferred from input] -R STR read group header line such as \'@RG\tID:foo\tSM:bar\' [null] -x STR read type: pacbio, ont2d or intractg [default] + intractg: intra-species contig (kb query, highly similar) + pacbio: pacbio subreads (~10kb query, high error rate) + ont2d: Oxford Nanopore reads (~10kb query, higher error rate) -t INT number of threads [1] -p input are paired-end reads if file2 absent @@ -20,6 +23,7 @@ Options: -o STR prefix for output files [inferred from -D skip duplicate marking (via samblaster) -S for SAM/BAM input, don\'t shuffle -s sort the output alignment (more RAM) + -k keep temporary files generated by typeHLA Note: File type is determined by the file extension of the first input sequence file. Recognized file extensions: fasta, fa, fastq, fq, fasta.gz, fa.gz, fastq.gz, @@ -77,6 +81,11 @@ my $is_pe = (defined($opts{p}) || @ARGV >= 3)? 1 : 0; my $is_sam = $ARGV[1] =~ /\.(sam|sam\.gz)$/? 1 : 0; my $is_bam = $ARGV[1] =~ /\.bam$/? 1 : 0; +if (defined($opts{x})) { + $opts{A} = $opts{D} = 1; + delete $opts{p}; +} + my $cmd = ''; if ($is_sam || $is_bam) { my $cmd_sam2bam = $is_sam? "$root/htsbox samview -uS $ARGV[1] \\\n" : "cat $ARGV[1] \\\n"; @@ -86,13 +95,13 @@ if ($is_sam || $is_bam) { $cmd_bam2fq = " | $root/htsbox bam2fq -O " . (defined($opts{p})? "-s $prefix.out.se.fq.gz " : "") . "- \\\n"; $cmd = $cmd_sam2bam . $cmd_shuf . $cmd_bam2fq; } elsif (@ARGV >= 3) { - $cmd = " | $root/seqtk mergepe $ARGV[1] $ARGV[2] \\\n"; + $cmd = "$root/seqtk mergepe $ARGV[1] $ARGV[2] \\\n"; } else { $cmd = "cat $ARGV[1] \\\n"; } $cmd .= " | $root/trimadap \\\n" unless defined($opts{A}); -$cmd .= " | $root/bwa mem -t$opts{t} " . (defined($opts{x})? "-x $opts{x} " : "") . ($is_pe? "-P " : "") . (defined($opts{R})? "-R'$opts{R}' " : "") . "$ARGV[0] - 2> $prefix.log.bwamem \\\n"; +$cmd .= " | $root/bwa mem -t$opts{t} " . (defined($opts{x})? "-x $opts{x} " : "") . ($is_pe? "-p " : "") . (defined($opts{R})? "-R'$opts{R}' " : "") . "$ARGV[0] - 2> $prefix.log.bwamem \\\n"; $cmd .= " | $root/samblaster 2> $prefix.log.dedup \\\n" unless defined($opts{D}); my $has_hla = 0; @@ -109,7 +118,12 @@ if (-f "$ARGV[0].alt") { my $t_sort = $opts{t} < 4? $opts{t} : 4; $cmd .= defined($opts{s})? " | $root/samtools sort -@ $t_sort -m1G - $prefix.out;\n" : " | gzip -1 > $prefix.out.sam.gz;\n"; -$cmd .= "$root/run-HLA $prefix.out;\n" if ($has_hla); + +if ($has_hla && (!defined($opts{x}) || $opts{x} eq 'intractg')) { + $cmd .= "$root/run-HLA ". ($opts{x} eq 'intractg'? "-A " : "") . "$prefix.out > $prefix.hla.top 2> $prefix.log.hla;\n"; + $cmd .= "cat $prefix.out.HLA*.gt | grep ^GT | cut -f2- > $prefix.hla.all;\n"; + $cmd .= "rm -f $prefix.out.HLA*;\n" unless defined($opts{k}); +} print $cmd;