2010-06-28 22:30:50 +08:00
|
|
|
#!/usr/bin/perl -w
|
|
|
|
|
|
|
|
|
|
use strict;
|
2010-06-28 23:46:41 +08:00
|
|
|
use FindBin;
|
|
|
|
|
use lib "$FindBin::Bin/../../";
|
|
|
|
|
|
2010-06-28 22:30:50 +08:00
|
|
|
use StingArgs;
|
|
|
|
|
use DistributedMake;
|
|
|
|
|
use DataTable;
|
|
|
|
|
use Data::Dumper;
|
|
|
|
|
use File::Basename;
|
|
|
|
|
|
|
|
|
|
my %args = &getCommandArguments(
|
|
|
|
|
"LIST" => undef,
|
|
|
|
|
"OUT_DIR" => undef,
|
|
|
|
|
|
|
|
|
|
"REFERENCE" => "/broad/1KG/reference/human_b36_both.fasta",
|
|
|
|
|
"DBSNP" => "/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod",
|
|
|
|
|
|
|
|
|
|
"SEQUENCE_INDEX" => "/broad/1KG/DCC/ftp/sequence.index",
|
|
|
|
|
"ALIGNMENT_INDEX" => "/broad/1KG/DCC/ftp/alignment.index",
|
|
|
|
|
|
|
|
|
|
"DRY_RUN" => 1,
|
|
|
|
|
"QUEUE" => "gsa",
|
|
|
|
|
"NUM_JOBS" => "",
|
|
|
|
|
"KEEP_GOING" => 1,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
my $dm = new DistributedMake("dryRun" => $args{'DRY_RUN'}, "queue" => $args{'QUEUE'}, "numJobs" => "", "keepGoing" => 1, "outputFile" => "$args{'OUT_DIR'}/GenerateDataProcessingReport2.log");
|
|
|
|
|
|
|
|
|
|
my @entries;
|
|
|
|
|
|
|
|
|
|
open(LIST, $args{'LIST'});
|
|
|
|
|
while (my $entry = <LIST>) {
|
|
|
|
|
chomp($entry);
|
|
|
|
|
|
|
|
|
|
my %entry;
|
|
|
|
|
($entry{'POP'}, $entry{'UG'}, $entry{'QCALL'}) = split(/\s+/, $entry);
|
|
|
|
|
|
|
|
|
|
push(@entries, \%entry);
|
|
|
|
|
}
|
|
|
|
|
close(LIST);
|
|
|
|
|
|
|
|
|
|
my @slides;
|
|
|
|
|
|
|
|
|
|
foreach my $entry (@entries) {
|
|
|
|
|
my %entry = %{$entry};
|
|
|
|
|
|
|
|
|
|
# Combine VCFs
|
|
|
|
|
my $vcfCombineOut = "$args{'OUT_DIR'}/intermediate/$entry{'POP'}.combined.vcf";
|
|
|
|
|
my $vcfCombineOutCmd = "java -Xmx2048m -jar $ENV{'STING_DIR'}/dist/GenomeAnalysisTK.jar -T VCFCombine -R $args{'REFERENCE'} -type UNION -B UG,VCF,$entry{'UG'} -B QCALL,VCF,$entry{'QCALL'} -A -priority 'UG,QCALL' -o $vcfCombineOut -l INFO -L 1";
|
|
|
|
|
$dm->addRule($vcfCombineOut, [$entry{'UG'}, $entry{'QCALL'}], $vcfCombineOutCmd);
|
|
|
|
|
|
|
|
|
|
# Evaluate variants
|
|
|
|
|
my $evalRoot = "$vcfCombineOut.eval/eval";
|
|
|
|
|
my $evalCounts = "$evalRoot.Count_Variants.csv";
|
|
|
|
|
my $evalCmd = "java -Xmx2048m -jar $ENV{'STING_DIR'}/dist/GenomeAnalysisTK.jar -T VariantEval -R $args{'REFERENCE'} -D $args{'DBSNP'} -B eval,VCF,$vcfCombineOut -select 'set==\"Intersection\"' -selectName Intersection -select 'set==\"UG-filteredInOther\"' -selectName InUG-FilteredInQCALL -select 'set==\"QCALL-filteredInOther\"' -selectName InQCALL-FilteredInUG -select 'set==\"UG\"' -selectName UG -select 'set==\"QCALL\"' -selectName QCALL -reportType R -reportLocation $evalRoot -l INFO -L 1";
|
|
|
|
|
$dm->addRule($evalCounts, $vcfCombineOut, $evalCmd);
|
|
|
|
|
|
|
|
|
|
# Get SNPs per sample
|
|
|
|
|
my $snpsPerSample = "$evalRoot.persample.table";
|
|
|
|
|
my $snpsPerSampleCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/GetSNPsPerSample.pl VCF=$vcfCombineOut OUT=$snpsPerSample";
|
|
|
|
|
$dm->addRule($snpsPerSample, $vcfCombineOut, $snpsPerSampleCmd);
|
|
|
|
|
|
|
|
|
|
# Create topsheet slide
|
|
|
|
|
my $slideDir = "$args{'OUT_DIR'}/intermediate/$entry{'POP'}/slides";
|
|
|
|
|
my $topSheetRoot = "$slideDir/$entry{'POP'}.000.topsheet";
|
|
|
|
|
my $topSheetSlide = "$topSheetRoot.pdf";
|
|
|
|
|
my $topSheetSlideCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/MakeTopSheetSlide.pl VCF=$vcfCombineOut SEQUENCE_INDEX=$args{'SEQUENCE_INDEX'} ALIGNMENT_INDEX=$args{'ALIGNMENT_INDEX'} EVAL_ROOT=$evalRoot OUT=$topSheetRoot";
|
|
|
|
|
#$dm->addRule($topSheetSlide, [$vcfCombineOut, $args{'SEQUENCE_INDEX'}, $args{'ALIGNMENT_INDEX'}, $evalCounts], $topSheetSlideCmd);
|
|
|
|
|
|
|
|
|
|
# Create Venn diagram slide of variants
|
|
|
|
|
my $vennRoot = "$slideDir/$entry{'POP'}.010.venn";
|
|
|
|
|
my $vennPdf = "$vennRoot.pdf";
|
|
|
|
|
my $vennPdfCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/MakeVennSlide.pl EVAL_ROOT=$evalRoot OUT_ROOT=$vennRoot POP=$entry{'POP'}";
|
|
|
|
|
$dm->addRule($vennPdf, $evalCounts, $vennPdfCmd);
|
|
|
|
|
|
|
|
|
|
# Create plots for other slides
|
|
|
|
|
my $plotRoot = "$evalRoot.plot";
|
|
|
|
|
my $plotDummy = "$plotRoot.venn_per_ac.norm.pdf";
|
|
|
|
|
my $plotCmd = "Rscript $ENV{'STING_DIR'}/perl/reports/1KG/MakeDataProcessingReportPlots.R $evalRoot $plotRoot $entry{'POP'}";
|
|
|
|
|
$dm->addRule($plotDummy, [$evalCounts, $snpsPerSample], $plotCmd, 'queue' => "");
|
|
|
|
|
|
|
|
|
|
# Create other slides
|
|
|
|
|
my $dprSlideDummy = "$slideDir/$entry{'POP'}.050.eval.plot.titv.pdf";
|
|
|
|
|
my $dprSlidesCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/MakeDPRSlides.pl PLOT_ROOT=$plotRoot OUT_DIR=$slideDir POP=$entry{'POP'}";
|
|
|
|
|
$dm->addRule($dprSlideDummy, $plotDummy, $dprSlidesCmd);
|
|
|
|
|
|
|
|
|
|
push(@slides, $dprSlideDummy);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
my $assembledSlides = "$args{'OUT_DIR'}/dpr.pdf";
|
|
|
|
|
my $assembledSlidesCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/AssembleSlides.pl DIR=$args{'OUT_DIR'} OUT=$assembledSlides";
|
|
|
|
|
$dm->addRule($assembledSlides, \@slides, $assembledSlidesCmd);
|
|
|
|
|
|
|
|
|
|
$dm->execute();
|