Add argument to sortByRef for specifying the tmp directory for sorting.

Update the liftover script to reflect this addition.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3323 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-05-07 13:53:26 +00:00
parent bfc58c70fa
commit 7cab994d88
2 changed files with 16 additions and 15 deletions

View File

@ -21,7 +21,7 @@ GetOptions( "vcf=s" => \$in,
"tmp=s" => \$tmp);
if ( !$in || !$gatk || !$chain || !$newRef || !$oldRef || !$out ) {
print "Usage: liftOverVCF.pl\n\t-vcf \t\t<input vcf>\n\t-gatk \t\t<path to gatk trunk>\n\t-chain \t\t<chain file>\n\t-newRef \t<path to new reference prefix; we will need newRef.dict, .fasta, and .fasta.fai>\n\t-oldRef \t<path to old reference prefix; we will need oldRef.fasta>\n\t-out \t\t<output vcf>\n\t-tmp <temp file location; defaults to /tmp>\n";
print "Usage: liftOverVCF.pl\n\t-vcf \t\t<input vcf>\n\t-gatk \t\t<path to gatk trunk>\n\t-chain \t\t<chain file>\n\t-newRef \t<path to new reference prefix; we will need newRef.dict, .fasta, and .fasta.fai>\n\t-oldRef \t<path to old reference prefix; we will need oldRef.fasta>\n\t-out \t\t<output vcf>\n\t-tmp \t\t<temp file location; defaults to /tmp>\n";
print "Example: ./liftOverVCF.pl\n\t-vcf /humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/1kg_snp_validation/all_validation_batches.b36.vcf\n\t-chain b36ToHg19.broad.over.chain\n\t-out lifted.hg19.vcf\n\t-gatk /humgen/gsa-scr1/ebanks/Sting_dev\n\t-newRef /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19\n\t-oldRef /broad/1KG/reference/human_b36_both\n";
exit(1);
}
@ -55,7 +55,7 @@ while ( $inHeader == 1 ) {
}
close(UNSORTED);
$cmd = "grep \"^#\" -v $unsorted_vcf | sort -n +1 | $gatk/perl/sortByRef.pl - $newRef.fasta.fai";
$cmd = "grep \"^#\" -v $unsorted_vcf | sort -n +1 -T $tmp | $gatk/perl/sortByRef.pl --tmp $tmp - $newRef.fasta.fai";
print SORTED `$cmd`;
close(SORTED);

View File

@ -6,7 +6,7 @@ use Getopt::Long;
sub usage {
print "\nUsage:\n";
print "sortByRef.pl [--k POS] INPUT REF_DICT\n\n";
print "sortByRef.pl [--k POS] [--tmp dir] INPUT REF_DICT\n\n";
print " Sorts lines of the input file INFILE according\n";
print " to the reference contig order specified by the\n";
@ -14,18 +14,21 @@ sub usage {
print " The sort is stable. If -k option is not specified,\n";
print " it is assumed that the contig name is the first\n";
print " field in each line.\n\n";
print " INPUT input file to sort. If '-' is specified, \n";
print " then reads from STDIN.\n";
print " REF_DICT .fai file, or ANY file that has contigs, in the\n";
print " desired soting order, as its first column.\n";
print " --k POS : contig name is in the field POS (1-based)\n";
print " of input lines.\n\n";
print " INPUT input file to sort. If '-' is specified, \n";
print " then reads from STDIN.\n";
print " REF_DICT .fai file, or ANY file that has contigs, in the\n";
print " desired soting order, as its first column.\n";
print " --k POS : contig name is in the field POS (1-based)\n";
print " of input lines.\n\n";
print " --tmp DIR : temp directory [default=/tmp]\n\n";
exit(1);
}
my $pos = 1;
GetOptions( "k:i" => \$pos );
my $tmp = "/tmp";
GetOptions( "k:i" => \$pos,
"tmp=s" => \$tmp);
$pos--;
@ -92,7 +95,7 @@ while ( <$INPUT> ) {
if ( defined $temp_outputs{$order} ) { $fhandle = $temp_outputs{$order} }
else {
#print "opening $order $$ $_\n";
open( $fhandle, " > /tmp/sortByRef.$$.$order.tmp" ) or
open( $fhandle, " > $tmp/sortByRef.$$.$order.tmp" ) or
die ( "Can not open temporary file $order: $!");
$temp_outputs{$order} = $fhandle;
}
@ -115,11 +118,9 @@ for ( my $i = 0 ; $i < $n ; $i++ ) {
next if ( ! defined $temp_outputs{$i} ) ;
my $f;
open ( $f, "< /tmp/sortByRef.$$.$i.tmp" );
open ( $f, "< $tmp/sortByRef.$$.$i.tmp" );
while ( <$f> ) { print ; }
close $f;
unlink "/tmp/sortByRef.$$.$i.tmp";
unlink "$tmp/sortByRef.$$.$i.tmp";
}