Obsoleted by VariantReport.R
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4418 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cf01f6d58a
commit
a15757b8e8
|
|
@ -1,18 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin;
|
||||
use lib "$FindBin::Bin/../../";
|
||||
|
||||
use StingArgs;
|
||||
|
||||
my %args = &getCommandArguments("DIR" => undef, "OUT" => undef);
|
||||
|
||||
my $outfile = $args{'OUT'};
|
||||
chomp(my @files = qx(find $args{'DIR'} -path \*slide\*.pdf | sort -n));
|
||||
|
||||
print join("\n", @files) . "\n";
|
||||
|
||||
my $cmd = "gs -q -sPAPERSIZE=letter -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dAutoRotatePages=/All -sOutputFile=$outfile " . join(' ', @files);
|
||||
|
||||
system($cmd);
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin;
|
||||
use lib "$FindBin::Bin/../../";
|
||||
|
||||
use StingArgs;
|
||||
use DistributedMake;
|
||||
use DataTable;
|
||||
use Data::Dumper;
|
||||
|
||||
my %args = &getCommandArguments(
|
||||
"OUT_DIR" => undef,
|
||||
"SEQUENCE_INDEX" => "/broad/1KG/DCC/ftp/sequence.index",
|
||||
"ALIGNMENT_INDEX" => "/broad/1KG/DCC/ftp/alignment.index"
|
||||
);
|
||||
|
||||
my @sequenceIndex = &readTable($args{'SEQUENCE_INDEX'}, "header" => 1, "delimiter" => "\t");
|
||||
my @alignmentIndex = &readTable($args{'ALIGNMENT_INDEX'}, "header" => 1, "delimiter" => "\t");
|
||||
|
||||
my %results;
|
||||
|
||||
foreach my $sequenceEntry (@sequenceIndex) {
|
||||
my %sequenceEntry = %{$sequenceEntry};
|
||||
|
||||
if ($sequenceEntry{'STUDY_NAME'} =~ /1000/ && $sequenceEntry{'ANALYSIS_GROUP'} =~ /low coverage/ && $sequenceEntry{'FASTQ_FILE'} !~ /_\d.filt.fastq.gz/) {
|
||||
${${$results{'all'}}{'sequencer'}}{$sequenceEntry{'INSTRUMENT_PLATFORM'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'sequencer'}}{$sequenceEntry{'INSTRUMENT_PLATFORM'}}++;
|
||||
|
||||
${$results{'all'}}{'population'} = "all";
|
||||
${$results{$sequenceEntry{'STUDY_ID'}}}{'population'} = $sequenceEntry{'POPULATION'};
|
||||
|
||||
${$results{'all'}}{'used_lanes'} += ($sequenceEntry{'WITHDRAWN'} == 0);
|
||||
${$results{$sequenceEntry{'STUDY_ID'}}}{'used_lanes'} += ($sequenceEntry{'WITHDRAWN'} == 0);
|
||||
|
||||
${$results{'all'}}{'unused_lanes'} += ($sequenceEntry{'WITHDRAWN'} != 0);
|
||||
${$results{$sequenceEntry{'STUDY_ID'}}}{'unused_lanes'} += ($sequenceEntry{'WITHDRAWN'} != 0);
|
||||
|
||||
if ($sequenceEntry{'WITHDRAWN'} == 0) {
|
||||
${${${$results{'all'}}{'lanes_per_samples'}}}{$sequenceEntry{'SAMPLE_NAME'}}++;
|
||||
${${${$results{$sequenceEntry{'STUDY_ID'}}}{'lanes_per_samples'}}}{$sequenceEntry{'SAMPLE_NAME'}}++;
|
||||
} else {
|
||||
${${$results{'all'}}{'withdrawn_comment'}}{$sequenceEntry{'COMMENT'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'withdrawn_comment'}}{$sequenceEntry{'COMMENT'}}++;
|
||||
}
|
||||
|
||||
${${$results{'all'}}{'lane_parities'}}{$sequenceEntry{'LIBRARY_LAYOUT'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'lane_parities'}}{$sequenceEntry{'LIBRARY_LAYOUT'}}++;
|
||||
|
||||
${${$results{'all'}}{'submission_date'}}{$sequenceEntry{'SUBMISSION_DATE'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'submission_date'}}{$sequenceEntry{'SUBMISSION_DATE'}}++;
|
||||
|
||||
${${$results{'all'}}{'center_names'}}{$sequenceEntry{'CENTER_NAME'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'center_names'}}{$sequenceEntry{'CENTER_NAME'}}++;
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $studyid (keys(%results)) {
|
||||
my %study = %{${${$results{$studyid}}{'lanes_per_samples'}}};
|
||||
my @samples = keys(%study);
|
||||
my $numSamples = scalar(@samples);
|
||||
|
||||
${$results{$studyid}}{'num_samples'} = $numSamples;
|
||||
}
|
||||
|
||||
print Dumper(\%results);
|
||||
|
|
@ -1,96 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin;
|
||||
use lib "$FindBin::Bin/../../";
|
||||
|
||||
use StingArgs;
|
||||
use DistributedMake;
|
||||
use DataTable;
|
||||
use Data::Dumper;
|
||||
use File::Basename;
|
||||
|
||||
my %args = &getCommandArguments(
|
||||
"LIST" => undef,
|
||||
"OUT_DIR" => undef,
|
||||
|
||||
"REFERENCE" => "/broad/1KG/reference/human_b36_both.fasta",
|
||||
"DBSNP" => "/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod",
|
||||
|
||||
"SEQUENCE_INDEX" => "/broad/1KG/DCC/ftp/sequence.index",
|
||||
"ALIGNMENT_INDEX" => "/broad/1KG/DCC/ftp/alignment.index",
|
||||
|
||||
"DRY_RUN" => 1,
|
||||
"QUEUE" => "gsa",
|
||||
"NUM_JOBS" => "",
|
||||
"KEEP_GOING" => 1,
|
||||
);
|
||||
|
||||
my $dm = new DistributedMake("dryRun" => $args{'DRY_RUN'}, "queue" => $args{'QUEUE'}, "numJobs" => "", "keepGoing" => 1, "outputFile" => "$args{'OUT_DIR'}/GenerateDataProcessingReport2.log");
|
||||
|
||||
my @entries;
|
||||
|
||||
open(LIST, $args{'LIST'});
|
||||
while (my $entry = <LIST>) {
|
||||
chomp($entry);
|
||||
|
||||
my %entry;
|
||||
($entry{'POP'}, $entry{'UG'}, $entry{'QCALL'}) = split(/\s+/, $entry);
|
||||
|
||||
push(@entries, \%entry);
|
||||
}
|
||||
close(LIST);
|
||||
|
||||
my @slides;
|
||||
|
||||
foreach my $entry (@entries) {
|
||||
my %entry = %{$entry};
|
||||
|
||||
# Combine VCFs
|
||||
my $vcfCombineOut = "$args{'OUT_DIR'}/intermediate/$entry{'POP'}.combined.vcf";
|
||||
my $vcfCombineOutCmd = "java -Xmx2048m -jar $ENV{'STING_DIR'}/dist/GenomeAnalysisTK.jar -T VCFCombine -R $args{'REFERENCE'} -type UNION -B UG,VCF,$entry{'UG'} -B QCALL,VCF,$entry{'QCALL'} -A -priority 'UG,QCALL' -o $vcfCombineOut -l INFO -L 1";
|
||||
$dm->addRule($vcfCombineOut, [$entry{'UG'}, $entry{'QCALL'}], $vcfCombineOutCmd);
|
||||
|
||||
# Evaluate variants
|
||||
my $evalRoot = "$vcfCombineOut.eval/eval";
|
||||
my $evalCounts = "$evalRoot.Count_Variants.csv";
|
||||
my $evalCmd = "java -Xmx2048m -jar $ENV{'STING_DIR'}/dist/GenomeAnalysisTK.jar -T VariantEval -R $args{'REFERENCE'} -D $args{'DBSNP'} -B eval,VCF,$vcfCombineOut -select 'set==\"Intersection\"' -selectName Intersection -select 'set==\"UG-filteredInOther\"' -selectName InUG-FilteredInQCALL -select 'set==\"QCALL-filteredInOther\"' -selectName InQCALL-FilteredInUG -select 'set==\"UG\"' -selectName UG -select 'set==\"QCALL\"' -selectName QCALL -reportType R -reportLocation $evalRoot -l INFO -L 1";
|
||||
$dm->addRule($evalCounts, $vcfCombineOut, $evalCmd);
|
||||
|
||||
# Get SNPs per sample
|
||||
my $snpsPerSample = "$evalRoot.persample.table";
|
||||
my $snpsPerSampleCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/GetSNPsPerSample.pl VCF=$vcfCombineOut OUT=$snpsPerSample";
|
||||
$dm->addRule($snpsPerSample, $vcfCombineOut, $snpsPerSampleCmd);
|
||||
|
||||
# Create topsheet slide
|
||||
my $slideDir = "$args{'OUT_DIR'}/intermediate/$entry{'POP'}/slides";
|
||||
my $topSheetRoot = "$slideDir/$entry{'POP'}.000.topsheet";
|
||||
my $topSheetSlide = "$topSheetRoot.pdf";
|
||||
my $topSheetSlideCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/MakeTopSheetSlide.pl VCF=$vcfCombineOut SEQUENCE_INDEX=$args{'SEQUENCE_INDEX'} ALIGNMENT_INDEX=$args{'ALIGNMENT_INDEX'} EVAL_ROOT=$evalRoot OUT=$topSheetRoot";
|
||||
#$dm->addRule($topSheetSlide, [$vcfCombineOut, $args{'SEQUENCE_INDEX'}, $args{'ALIGNMENT_INDEX'}, $evalCounts], $topSheetSlideCmd);
|
||||
|
||||
# Create Venn diagram slide of variants
|
||||
my $vennRoot = "$slideDir/$entry{'POP'}.010.venn";
|
||||
my $vennPdf = "$vennRoot.pdf";
|
||||
my $vennPdfCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/MakeVennSlide.pl EVAL_ROOT=$evalRoot OUT_ROOT=$vennRoot POP=$entry{'POP'}";
|
||||
$dm->addRule($vennPdf, $evalCounts, $vennPdfCmd);
|
||||
|
||||
# Create plots for other slides
|
||||
my $plotRoot = "$evalRoot.plot";
|
||||
my $plotDummy = "$plotRoot.venn_per_ac.norm.pdf";
|
||||
my $plotCmd = "Rscript $ENV{'STING_DIR'}/perl/reports/1KG/MakeDataProcessingReportPlots.R $evalRoot $plotRoot $entry{'POP'}";
|
||||
$dm->addRule($plotDummy, [$evalCounts, $snpsPerSample], $plotCmd, 'queue' => "");
|
||||
|
||||
# Create other slides
|
||||
my $dprSlideDummy = "$slideDir/$entry{'POP'}.050.eval.plot.titv.pdf";
|
||||
my $dprSlidesCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/MakeDPRSlides.pl PLOT_ROOT=$plotRoot OUT_DIR=$slideDir POP=$entry{'POP'}";
|
||||
$dm->addRule($dprSlideDummy, $plotDummy, $dprSlidesCmd);
|
||||
|
||||
push(@slides, $dprSlideDummy);
|
||||
}
|
||||
|
||||
my $assembledSlides = "$args{'OUT_DIR'}/dpr.pdf";
|
||||
my $assembledSlidesCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/AssembleSlides.pl DIR=$args{'OUT_DIR'} OUT=$assembledSlides";
|
||||
$dm->addRule($assembledSlides, \@slides, $assembledSlidesCmd);
|
||||
|
||||
$dm->execute();
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin;
|
||||
use lib "$FindBin::Bin/../../";
|
||||
|
||||
use StingArgs;
|
||||
|
||||
my %args = &getCommandArguments("VCF" => undef, "OUT" => "/dev/stdout");
|
||||
|
||||
my %table;
|
||||
my %sets;
|
||||
|
||||
open(VCF, $args{'VCF'});
|
||||
|
||||
my @header;
|
||||
while (my $line = <VCF>) {
|
||||
chomp($line);
|
||||
if ($line =~ /#/) {
|
||||
if ($line =~ /CHROM/) {
|
||||
$line =~ s/#//g;
|
||||
@header = split(/\s+/, $line);
|
||||
}
|
||||
} else {
|
||||
my @columns = split(/\s+/, $line);
|
||||
|
||||
if ($columns[0] eq '1') {
|
||||
my %entry;
|
||||
for (my $i = 0; $i <= $#columns; $i++) {
|
||||
$entry{$header[$i]} = $columns[$i];
|
||||
}
|
||||
|
||||
my ($set) = $entry{'INFO'} =~ /set=(\w+)/;
|
||||
$sets{$set} = 1;
|
||||
|
||||
for (my $i = 9; $i <= $#header; $i++) {
|
||||
if ($entry{$header[$i]} =~ /0[\\\|\/]1/ ||
|
||||
$entry{$header[$i]} =~ /1[\\\|\/]0/ ||
|
||||
$entry{$header[$i]} =~ /1[\\\|\/]1/) {
|
||||
|
||||
${$table{$header[$i]}}{$set}++;
|
||||
$sets{$set} = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
close(VCF);
|
||||
|
||||
open(OUT, ">$args{'OUT'}");
|
||||
|
||||
print OUT "sample\t" . join("\t", sort { $a cmp $b } keys(%sets)) . "\n";
|
||||
|
||||
foreach my $sample (sort { $a cmp $b } keys(%table)) {
|
||||
print OUT "$sample";
|
||||
|
||||
foreach my $set (sort { $a cmp $b } keys(%sets)) {
|
||||
my $value = 0;
|
||||
if (exists(${$table{$sample}}{$set})) {
|
||||
$value = ${$table{$sample}}{$set};
|
||||
}
|
||||
|
||||
print OUT "\t$value";
|
||||
}
|
||||
|
||||
print OUT "\n";
|
||||
}
|
||||
|
||||
close(OUT);
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin;
|
||||
use lib "$FindBin::Bin/../../";
|
||||
|
||||
use StingArgs;
|
||||
use File::Basename;
|
||||
use DistributedMake;
|
||||
|
||||
my %args = &getCommandArguments("PLOT_ROOT" => undef, "OUT_DIR" => undef, "POP" => undef);
|
||||
|
||||
my $dm = new DistributedMake("dryRun" => 0, "numJobs" => "1");
|
||||
|
||||
my $timestamp = gmtime();
|
||||
|
||||
my @items = (
|
||||
{ 'plotprefix' => "$args{'PLOT_ROOT'}.venn_per_ac", 'title' => 'Callset concordance per allele count', 'population' => $args{'POP'}, 'caption' => "Concordance of the UnifiedGenotyper and QCALL callsets, per allele count. Bottom and top sections of each bar represent the calls unique to each caller's callset, while the middle portion represents the intersection.", 'timestamp' => $timestamp },
|
||||
{ 'plotprefix' => "$args{'PLOT_ROOT'}.venn_per_ac.norm", 'title' => 'Normalized callset concordance per allele count', 'population' => $args{'POP'}, 'caption' => "Concordance of the UnifiedGenotyper and QCALL callsets, per allele count, normalized by the number of variants in the callset union for that allele count bin. Bottom and top sections of each bar represent the calls unique to each caller's callset, while the middle portion represents the intersection.", 'timestamp' => $timestamp },
|
||||
{ 'plotprefix' => "$args{'PLOT_ROOT'}.acs", 'title' => 'Allele count spectrum', 'population' => $args{'POP'}, 'caption' => 'Allele count spectrum for all known, novel, and all variants. Individual contributions from separate callers (thin, dotted or dashed lines) as well as the callset intersection (bold, solid lines) are shown.', 'timestamp' => $timestamp },
|
||||
{ 'plotprefix' => "$args{'PLOT_ROOT'}.titv", 'title' => 'Ti/Tv per allele count', 'population' => $args{'POP'}, 'caption' => 'Ratio of transition mutations to transversions, per allele count. Whole-genome expectation is approximately 2.1. Fluctuation at higher allele counts is typically due to fewer available variants for the calculation at those frequencies.', 'timestamp' => $timestamp },
|
||||
{ 'plotprefix' => "$args{'PLOT_ROOT'}.counts_per_sample", 'title' => 'SNP calls per sample', 'population' => $args{'POP'}, 'caption' => 'Number of SNPs per sample. As samples in plot are all from the same or similar populations, with presumably similar heterozygosity, all samples should have roughly consistent performance. Outliers may indicate problematic samples - perhaps in coverage, identity QC (sample mixup), or quality of sequencing.', 'timestamp' => $timestamp },
|
||||
);
|
||||
|
||||
my $idnum = 1;
|
||||
|
||||
foreach my $item (@items) {
|
||||
$idnum++;
|
||||
my $id = "$args{'POP'}." . sprintf("%03d", $idnum*10);
|
||||
|
||||
my %item = %$item;
|
||||
|
||||
my $basename = "$id." . basename($item{'plotprefix'});
|
||||
my $table = "$args{'OUT_DIR'}/$basename.kvp";
|
||||
|
||||
open(TABLE, ">$table");
|
||||
foreach my $key (sort { $a cmp $b } keys(%item)) {
|
||||
print TABLE "$key\t$item{$key}\n";
|
||||
}
|
||||
close(TABLE);
|
||||
|
||||
my $render = "$args{'OUT_DIR'}/$basename.tex";
|
||||
my $renderCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/RenderTemplate.pl KVP=$table TEMPLATE=$ENV{'STING_DIR'}/perl/reports/1KG/plot_and_text_template.tex TEMPLATE_OUT=$render";
|
||||
$dm->addRule($render, "$item{'plotprefix'}.pdf", $renderCmd);
|
||||
|
||||
my $pdf = "$args{'OUT_DIR'}/$basename.pdf";
|
||||
my $pdfdir = dirname($pdf);
|
||||
my $pdfCmd = "pdflatex -output-directory $pdfdir $render";
|
||||
$dm->addRule($pdf, $render, $pdfCmd);
|
||||
}
|
||||
|
||||
$dm->execute();
|
||||
|
|
@ -1,146 +0,0 @@
|
|||
args = commandArgs(TRUE);
|
||||
|
||||
evalRoot = args[1];
|
||||
if (is.na(evalRoot)) { evalRoot = "results/v2/intermediate/CEU.combined.vcf.eval/eval"; }
|
||||
|
||||
plotRoot = args[2];
|
||||
if (is.na(plotRoot)) { plotRoot = "results/v2/intermediate/CEU.combined.vcf.eval/eval.plot"; }
|
||||
|
||||
population = args[3];
|
||||
if (is.na(population)) { population = "unknown population"; }
|
||||
|
||||
fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep="");
|
||||
fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep="");
|
||||
fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep="");
|
||||
fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep="");
|
||||
fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep="");
|
||||
fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep="");
|
||||
fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep="");
|
||||
fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep="");
|
||||
fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep="");
|
||||
fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep="");
|
||||
fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep="");
|
||||
fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep="");
|
||||
filePerSample = paste(evalRoot, ".persample.table", sep="");
|
||||
|
||||
# Allele count spectrum
|
||||
if (!is.na(plotRoot)) {
|
||||
pdf(paste(plotRoot, ".acs.pdf", sep=""));
|
||||
} else {
|
||||
x11();
|
||||
}
|
||||
|
||||
dataMetricsByAc = read.csv(fileMetricsByAc, header=TRUE, comment.char="#");
|
||||
|
||||
dataMetricsByAc.none.all = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "none" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "all"),];
|
||||
|
||||
dataMetricsByAc.int.all = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "Intersection" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "all"),];
|
||||
dataMetricsByAc.int.known = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "Intersection" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "known"),];
|
||||
dataMetricsByAc.int.novel = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "Intersection" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "novel"),];
|
||||
|
||||
dataMetricsByAc.UG.all = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "UG" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "all"),] + dataMetricsByAc.int.all;
|
||||
dataMetricsByAc.UG.known = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "UG" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "known"),] + dataMetricsByAc.int.known;
|
||||
dataMetricsByAc.UG.novel = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "UG" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "novel"),] + dataMetricsByAc.int.novel;
|
||||
|
||||
dataMetricsByAc.QCALL.all = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "QCALL" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "all"),] + dataMetricsByAc.int.all;
|
||||
dataMetricsByAc.QCALL.known = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "QCALL" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "known"),] + dataMetricsByAc.int.known;
|
||||
dataMetricsByAc.QCALL.novel = dataMetricsByAc[which(dataMetricsByAc$jexl_expression == "QCALL" & dataMetricsByAc$filter_name == "called" & dataMetricsByAc$novelty_name == "novel"),] + dataMetricsByAc.int.novel;
|
||||
|
||||
|
||||
plot(0, 0, type="n", xlab="Allele count", ylab="Number of variants", xlim=c(0, max(dataMetricsByAc.none.all$AC)), ylim=c(min(dataMetricsByAc.none.all$n), max(dataMetricsByAc.none.all$n)), main=paste("Allele count spectrum (", population, ")", sep=""), cex=1.2, cex.axis=1.2, cex.lab=1.2, bty="n");
|
||||
|
||||
points(dataMetricsByAc.int.all$AC, dataMetricsByAc.int.all$n, type="l", lwd=3, col="black");
|
||||
points(dataMetricsByAc.int.known$AC, dataMetricsByAc.int.known$n, type="l", lwd=3, col="blue");
|
||||
points(dataMetricsByAc.int.novel$AC, dataMetricsByAc.int.novel$n, type="l", lwd=3, col="red");
|
||||
|
||||
points(dataMetricsByAc.UG.all$AC, dataMetricsByAc.UG.all$n, type="l", lwd=1, lty=2, col="black");
|
||||
points(dataMetricsByAc.UG.known$AC, dataMetricsByAc.UG.known$n, type="l", lwd=1, lty=2, col="blue");
|
||||
points(dataMetricsByAc.UG.novel$AC, dataMetricsByAc.UG.novel$n, type="l", lwd=1, lty=2, col="red");
|
||||
|
||||
points(dataMetricsByAc.QCALL.all$AC, dataMetricsByAc.QCALL.all$n, type="l", lwd=1, lty=3, col="black");
|
||||
points(dataMetricsByAc.QCALL.known$AC, dataMetricsByAc.QCALL.known$n, type="l", lwd=1, lty=3, col="blue");
|
||||
points(dataMetricsByAc.QCALL.novel$AC, dataMetricsByAc.QCALL.novel$n, type="l", lwd=1, lty=3, col="red");
|
||||
|
||||
legend("topright", c("Intersection: all", "Intersection: known (present in dbSNP 129)", "Intersection: novel (absent in dbSNP 129)", "UnifiedGenotyper: all", "UnifiedGenotyper: known (present in dbSNP 129)", "UnifiedGenotyper: novel (absent in dbSNP 129)", "QCALL: all", "QCALL: known (present in dbSNP 129)", "QCALL: novel (absent in dbSNP 129)"), lwd=c(3, 3, 3, 1, 1, 1, 1, 1, 1), lty=c(1, 1, 1, 2, 2, 2, 3, 3, 3), col=c("black", "blue", "red"), cex=1.2);
|
||||
|
||||
if (!is.na(plotRoot)) { dev.off(); }
|
||||
|
||||
# Ti/Tv per allele count
|
||||
if (!is.na(plotRoot)) {
|
||||
pdf(paste(plotRoot, ".titv.pdf", sep=""));
|
||||
} else {
|
||||
x11();
|
||||
}
|
||||
|
||||
plot(0, 0, type="n", xlab="Allele count", ylab="Transition/transversion ratio", xlim=c(0, max(dataMetricsByAc.none.all$AC)), ylim=c(0, max(dataMetricsByAc$Ti.Tv)), main=paste("Ti/Tv per allele count (", population, ")", sep=""), cex=1.2, cex.lab=1.2, cex.axis=1.2, bty="n");
|
||||
|
||||
points(dataMetricsByAc.int.all$AC, dataMetricsByAc.int.all$Ti.Tv, type="l", lwd=3, lty=1, col="black");
|
||||
points(dataMetricsByAc.int.known$AC, dataMetricsByAc.int.known$Ti.Tv, type="l", lwd=3, lty=1, col="blue");
|
||||
points(dataMetricsByAc.int.novel$AC, dataMetricsByAc.int.novel$Ti.Tv, type="l", lwd=3, lty=1, col="red");
|
||||
|
||||
points(dataMetricsByAc.UG.all$AC, dataMetricsByAc.int.all$Ti.Tv, type="l", lwd=1, lty=2, col="black");
|
||||
points(dataMetricsByAc.UG.known$AC, dataMetricsByAc.int.known$Ti.Tv, type="l", lwd=1, lty=2, col="blue");
|
||||
points(dataMetricsByAc.UG.novel$AC, dataMetricsByAc.int.novel$Ti.Tv, type="l", lwd=1, lty=2, col="red");
|
||||
|
||||
points(dataMetricsByAc.QCALL.all$AC, dataMetricsByAc.int.all$Ti.Tv, type="l", lwd=1, lty=3, col="black");
|
||||
points(dataMetricsByAc.QCALL.known$AC, dataMetricsByAc.int.known$Ti.Tv, type="l", lwd=1, lty=3, col="blue");
|
||||
points(dataMetricsByAc.QCALL.novel$AC, dataMetricsByAc.int.novel$Ti.Tv, type="l", lwd=1, lty=3, col="red");
|
||||
|
||||
legend("topright", c("Intersection: all", "Intersection: known (present in dbSNP 129)", "Intersection: novel (absent in dbSNP 129)", "UnifiedGenotyper: all", "UnifiedGenotyper: known (present in dbSNP 129)", "UnifiedGenotyper: novel (absent in dbSNP 129)", "QCALL: all", "QCALL: known (present in dbSNP 129)", "QCALL: novel (absent in dbSNP 129)"), lwd=c(3, 3, 3, 1, 1, 1, 1, 1, 1), lty=c(1, 1, 1, 2, 2, 2, 3, 3, 3), col=c("black", "blue", "red"), cex=1.2);
|
||||
|
||||
if (!is.na(plotRoot)) { dev.off(); }
|
||||
|
||||
# Barplot (unnormalized)
|
||||
if (!is.na(plotRoot)) {
|
||||
pdf(paste(plotRoot, ".venn_per_ac.pdf", sep=""));
|
||||
} else {
|
||||
x11();
|
||||
}
|
||||
|
||||
norm = dataMetricsByAc.UG.all$n + dataMetricsByAc.int.all$n + dataMetricsByAc.QCALL.all$n;
|
||||
mat = rbind(dataMetricsByAc.QCALL.all$n, dataMetricsByAc.int.all$n, dataMetricsByAc.UG.all$n);
|
||||
matnorm = rbind(dataMetricsByAc.QCALL.all$n/norm, dataMetricsByAc.int.all$n/norm, dataMetricsByAc.UG.all$n/norm);
|
||||
|
||||
qcallcolor = "#FF5555";
|
||||
intcolor = "#5582C6";
|
||||
ugcolor = "#55BBFF";
|
||||
|
||||
barplot(mat, col=c(qcallcolor, intcolor, ugcolor), xlab="Allele count", ylab="counts", main=paste("Callset concordance per allele count (", population, ")", sep=""), names.arg=dataMetricsByAc.UG.all$AC);
|
||||
legend("topright", c("UG-only", "Intersection", "QCALL-only"), fill=c(ugcolor, intcolor, qcallcolor));
|
||||
|
||||
if (!is.na(plotRoot)) { dev.off(); }
|
||||
|
||||
# Barplot (normalized)
|
||||
if (!is.na(plotRoot)) {
|
||||
pdf(paste(plotRoot, ".venn_per_ac.norm.pdf", sep=""));
|
||||
} else {
|
||||
x11();
|
||||
}
|
||||
|
||||
barplot(matnorm, col=c(qcallcolor, intcolor, ugcolor), xlab="Allele count", ylab="Fraction", main=paste("Callset concordance per allele count (", population, ")", sep=""), names.arg=dataMetricsByAc.UG.all$AC, ylim=c(0, 1.3));
|
||||
legend("topright", c("UG-only", "Intersection", "QCALL-only"), fill=c(ugcolor, intcolor, qcallcolor));
|
||||
|
||||
if (!is.na(plotRoot)) { dev.off(); }
|
||||
|
||||
# Per-sample
|
||||
if (!is.na(plotRoot)) {
|
||||
pdf(paste(plotRoot, ".counts_per_sample.pdf", sep=""), width=10);
|
||||
} else {
|
||||
x11(width=10);
|
||||
}
|
||||
|
||||
dataPerSample = read.table(filePerSample, header=TRUE);
|
||||
|
||||
par(mar=c(7, 5, 2, 1));
|
||||
|
||||
plot(0, 0, type="n", xaxt="n", xlim=c(1, length(dataPerSample$sample)), ylim=c(0, max(dataPerSample$Intersection + dataPerSample$UG, dataPerSample$Intersection, dataPerSample$Intersection + dataPerSample$QCALL)), xlab="", ylab="Counts", main=paste("SNPs per sample (", population, ")", sep=""), cex=1.2, cex.lab=1.2, cex.axis=1.2);
|
||||
|
||||
points(dataPerSample$sample, dataPerSample$Intersection + dataPerSample$UG, col=ugcolor);
|
||||
points(dataPerSample$sample, dataPerSample$Intersection, col=intcolor);
|
||||
points(dataPerSample$sample, dataPerSample$Intersection + dataPerSample$QCALL, col=qcallcolor);
|
||||
|
||||
legend("bottomleft", c("UG", "Intersection", "QCALL"), fill=c(ugcolor, intcolor, qcallcolor));
|
||||
|
||||
axis(1, at=c(1:length(dataPerSample$sample)), labels=dataPerSample$sample, las=2, cex.axis=0.7);
|
||||
|
||||
if (!is.na(plotRoot)) { dev.off(); }
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin;
|
||||
use lib "$FindBin::Bin/../../";
|
||||
|
||||
use StingArgs;
|
||||
use DistributedMake;
|
||||
use DataTable;
|
||||
use Data::Dumper;
|
||||
|
||||
my %args = &getCommandArguments(
|
||||
"VCF" => undef,
|
||||
"EVAL_ROOT" => undef,
|
||||
"OUT" => undef,
|
||||
|
||||
"POP" => undef,
|
||||
"SRP_IDS" => undef,
|
||||
"SEQUENCE_INDEX" => "/broad/1KG/DCC/ftp/sequence.index",
|
||||
"ALIGNMENT_INDEX" => "/broad/1KG/DCC/ftp/alignment.index"
|
||||
);
|
||||
|
||||
my @sequenceIndex = &readTable($args{'SEQUENCE_INDEX'}, "header" => 1, "delimiter" => "\t");
|
||||
my @alignmentIndex = &readTable($args{'ALIGNMENT_INDEX'}, "header" => 1, "delimiter" => "\t");
|
||||
|
||||
my %results;
|
||||
|
||||
foreach my $sequenceEntry (@sequenceIndex) {
|
||||
my %sequenceEntry = %{$sequenceEntry};
|
||||
|
||||
if ($sequenceEntry{'STUDY_NAME'} =~ /1000/ && $sequenceEntry{'ANALYSIS_GROUP'} =~ /low coverage/ && $sequenceEntry{'FASTQ_FILE'} !~ /_\d.filt.fastq.gz/) {
|
||||
${${$results{'all'}}{'sequencer'}}{$sequenceEntry{'INSTRUMENT_PLATFORM'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'sequencer'}}{$sequenceEntry{'INSTRUMENT_PLATFORM'}}++;
|
||||
|
||||
${$results{'all'}}{'population'} = "all";
|
||||
${$results{$sequenceEntry{'STUDY_ID'}}}{'population'} = $sequenceEntry{'POPULATION'};
|
||||
|
||||
${$results{'all'}}{'used_lanes'} += ($sequenceEntry{'WITHDRAWN'} == 0);
|
||||
${$results{$sequenceEntry{'STUDY_ID'}}}{'used_lanes'} += ($sequenceEntry{'WITHDRAWN'} == 0);
|
||||
|
||||
${$results{'all'}}{'unused_lanes'} += ($sequenceEntry{'WITHDRAWN'} != 0);
|
||||
${$results{$sequenceEntry{'STUDY_ID'}}}{'unused_lanes'} += ($sequenceEntry{'WITHDRAWN'} != 0);
|
||||
|
||||
if ($sequenceEntry{'WITHDRAWN'} == 0) {
|
||||
${${${$results{'all'}}{'lanes_per_samples'}}}{$sequenceEntry{'SAMPLE_NAME'}}++;
|
||||
${${${$results{$sequenceEntry{'STUDY_ID'}}}{'lanes_per_samples'}}}{$sequenceEntry{'SAMPLE_NAME'}}++;
|
||||
} else {
|
||||
${${$results{'all'}}{'withdrawn_comment'}}{$sequenceEntry{'COMMENT'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'withdrawn_comment'}}{$sequenceEntry{'COMMENT'}}++;
|
||||
}
|
||||
|
||||
${${$results{'all'}}{'lane_parities'}}{$sequenceEntry{'LIBRARY_LAYOUT'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'lane_parities'}}{$sequenceEntry{'LIBRARY_LAYOUT'}}++;
|
||||
|
||||
${${$results{'all'}}{'submission_date'}}{$sequenceEntry{'SUBMISSION_DATE'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'submission_date'}}{$sequenceEntry{'SUBMISSION_DATE'}}++;
|
||||
|
||||
${${$results{'all'}}{'center_names'}}{$sequenceEntry{'CENTER_NAME'}}++;
|
||||
${${$results{$sequenceEntry{'STUDY_ID'}}}{'center_names'}}{$sequenceEntry{'CENTER_NAME'}}++;
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $studyid (keys(%results)) {
|
||||
my %study = %{${${$results{$studyid}}{'lanes_per_samples'}}};
|
||||
my @samples = keys(%study);
|
||||
my $numSamples = scalar(@samples);
|
||||
|
||||
${$results{$studyid}}{'num_samples'} = $numSamples;
|
||||
}
|
||||
|
||||
print Dumper(\%results);
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin;
|
||||
use lib "$FindBin::Bin/../../";
|
||||
|
||||
use StingArgs;
|
||||
use DistributedMake;
|
||||
use Data::Dumper;
|
||||
use File::Basename;
|
||||
|
||||
sub getCSV {
|
||||
my ($file) = @_;
|
||||
|
||||
my @header;
|
||||
my @csv;
|
||||
|
||||
open(CSV, $file);
|
||||
while (my $csvline = <CSV>) {
|
||||
if ($csvline !~ /#/) {
|
||||
chomp(my @columns = split(/,/, $csvline));
|
||||
|
||||
if (scalar(@columns) > 1) {
|
||||
if (scalar(@header) < 2) {
|
||||
@header = @columns;
|
||||
} else {
|
||||
my %entry;
|
||||
for (my $column = 0; $column <= $#header; $column++) {
|
||||
$entry{$header[$column]} = $columns[$column];
|
||||
}
|
||||
|
||||
push(@csv, \%entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
close(CSV);
|
||||
|
||||
return @csv;
|
||||
}
|
||||
|
||||
my %args = &getCommandArguments("EVAL_ROOT" => undef, "OUT_ROOT" => undef, "POP" => undef);
|
||||
|
||||
my %values;
|
||||
|
||||
my @header;
|
||||
my @counts = &getCSV("$args{'EVAL_ROOT'}.Count_Variants.csv");
|
||||
my @titv = &getCSV("$args{'EVAL_ROOT'}.Ti_slash_Tv_Variant_Evaluator.csv");
|
||||
|
||||
my $varout = "$args{'OUT_ROOT'}.kvp";
|
||||
open(VAROUT, ">$varout");
|
||||
foreach my $entry (@counts) {
|
||||
my %entry = %$entry;
|
||||
|
||||
print VAROUT "$entry{'jexl_expression'}.$entry{'filter_name'}.$entry{'novelty_name'}.nVariantLoci\t$entry{'nVariantLoci'}\n";
|
||||
|
||||
if ($entry{'jexl_expression'} eq 'Intersection') { $values{'Intersection'} = $entry{'nVariantLoci'}; }
|
||||
if ($entry{'jexl_expression'} eq 'UG') { $values{'UG-only'} = $entry{'nVariantLoci'}; }
|
||||
if ($entry{'jexl_expression'} eq 'QCALL') { $values{'QCALL-only'} = $entry{'nVariantLoci'}; }
|
||||
}
|
||||
|
||||
foreach my $entry (@titv) {
|
||||
my %entry = %$entry;
|
||||
|
||||
print VAROUT "$entry{'jexl_expression'}.$entry{'filter_name'}.$entry{'novelty_name'}.ti/tv_ratio\t$entry{'ti/tv ratio'}\n";
|
||||
}
|
||||
|
||||
my $outpng = "$args{'OUT_ROOT'}.png";
|
||||
print VAROUT "venndiagram\t$args{'OUT_ROOT'}\n";
|
||||
print VAROUT "population\t$args{'POP'}\n";
|
||||
print VAROUT "timestamp\t" . gmtime() . "\n";
|
||||
close(VAROUT);
|
||||
|
||||
my $ug = $values{'UG-only'} + $values{'Intersection'};
|
||||
my $qcall = $values{'QCALL-only'} + $values{'Intersection'};
|
||||
|
||||
my $scaledug = ($ug > $qcall) ? 100 : int($ug*100/$qcall);
|
||||
my $scaledqcall = ($qcall > $ug) ? 100 : int($qcall*100/$ug);
|
||||
my $scaledint = ($ug > $qcall) ? int($values{'Intersection'}*100/$ug) : int($values{'Intersection'}*100/$qcall);
|
||||
|
||||
my $dm = new DistributedMake("dryRun" => 0);
|
||||
|
||||
my $vennCmd = "wget -O $outpng \"http://chart.apis.google.com/chart?cht=v&chd=t:$scaledqcall,$scaledug,0,$scaledint&chco=FF3355,00BBFF&chs=600x480&chdl=QCALL|UnifiedGenotyper&chtt=Callset+concordance+($args{'POP'})&chts=000000,20&chma=100,0,0,0\"";
|
||||
$dm->addRule($outpng, $varout, $vennCmd);
|
||||
|
||||
my $render = "$args{'OUT_ROOT'}.tex";
|
||||
my $renderCmd = "$ENV{'STING_DIR'}/perl/reports/1KG/RenderTemplate.pl KVP=$varout TEMPLATE=$ENV{'STING_DIR'}/perl/reports/1KG/venn_template.tex TEMPLATE_OUT=$render";
|
||||
$dm->addRule($render, $outpng, $renderCmd);
|
||||
|
||||
my $pdf = "$args{'OUT_ROOT'}.pdf";
|
||||
my $pdfdir = dirname($args{'OUT_ROOT'});
|
||||
my $pdfCmd = "pdflatex -output-directory $pdfdir $render";
|
||||
$dm->addRule($pdf, $render, $pdfCmd);
|
||||
|
||||
$dm->execute();
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin;
|
||||
use lib "$FindBin::Bin/../../";
|
||||
|
||||
use StingArgs;
|
||||
use Data::Dumper;
|
||||
|
||||
my %args = &getCommandArguments("KVP" => undef, "TEMPLATE" => undef, "TEMPLATE_OUT" => undef);
|
||||
|
||||
my %vars;
|
||||
open(KVP, $args{'KVP'});
|
||||
while (my $kvpline = <KVP>) {
|
||||
chomp($kvpline);
|
||||
|
||||
my ($key, $value) = split(/\t+/, $kvpline);
|
||||
$vars{$key} = $value;
|
||||
}
|
||||
close(KVP);
|
||||
|
||||
open(TEMPLATE, $args{'TEMPLATE'});
|
||||
open(TEMPLATE_OUT, ">$args{'TEMPLATE_OUT'}");
|
||||
while (my $line = <TEMPLATE>) {
|
||||
chomp($line);
|
||||
|
||||
while ($line =~ /\$(.+?)\$/) {
|
||||
if (exists($vars{$1})) {
|
||||
my ($key, $value) = ($1, $vars{$1});
|
||||
$line =~ s/\$$key\$/$value/;
|
||||
} else {
|
||||
$line =~ s/\$$1\$/unknown/;
|
||||
}
|
||||
}
|
||||
|
||||
print TEMPLATE_OUT "$line\n";
|
||||
}
|
||||
close(TEMPLATE_OUT);
|
||||
close(TEMPLATE);
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
\documentclass[10pt]{article}
|
||||
|
||||
\usepackage[lmargin=0.3in, rmargin=0.3in, tmargin=0.9in, bmargin=0.9in, paperwidth=10.0in, paperheight=7.5in]{geometry}
|
||||
\usepackage{multicol}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{fancyhdr}
|
||||
|
||||
\pagestyle{fancy}
|
||||
|
||||
\begin{document}
|
||||
\lhead{\bfseries{\Large{$title$ - $population$ samples}}}
|
||||
\rhead{}
|
||||
\chead{}
|
||||
|
||||
\begin{center}
|
||||
\includegraphics[height=125mm, type=pdf, ext=.pdf, read=.pdf]{$plotprefix$}
|
||||
\end{center}
|
||||
|
||||
$caption$
|
||||
|
||||
\lfoot{}
|
||||
\cfoot{}
|
||||
\rfoot{\small{1,000 Genomes Automated Data Processing Report ($timestamp$)}}
|
||||
\end{document}
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
\documentclass[10pt]{article}
|
||||
|
||||
\usepackage[lmargin=0.3in, rmargin=0.3in, tmargin=0.9in, bmargin=0.9in, paperwidth=10.0in, paperheight=7.5in]{geometry}
|
||||
\usepackage{multicol}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{fancyhdr}
|
||||
|
||||
\pagestyle{fancy}
|
||||
|
||||
\begin{document}
|
||||
\lhead{\bfseries{\Large{Sequencing and variant summary - $population$ samples}}}
|
||||
\rhead{}
|
||||
\chead{}
|
||||
|
||||
\begin{tabular}{|r|l|l|}
|
||||
\multicolumn{3}{l}{\bfseries{Sequencing summary}} \\
|
||||
\hline
|
||||
& Lanes & Aligned Lanes \\
|
||||
\hline
|
||||
All & $allLanes$ & $allAlignedLanes$ \\
|
||||
Passed QC & $passedQCLanes$ & $passedQCAlignedLanes$ \\
|
||||
Failed Genotype QC & $failedGenotypeQCLanes$ & $failedGenotypeQCAlignedLanes$ \\
|
||||
Failed one of the DCC processes & $failedDCCProcessLanes$ & $failedDCCProcessAlignedLanes$ \\
|
||||
Not yet available from archive & $failedUnavailableLanes$ & $failedUnavailableAlignedLanes$ \\
|
||||
Suppressed in archive & $failedSuppressedLanes$ & $failedSuppressedAlignedLanes$ \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
|
||||
\begin{tabular}{|r|l|l|}
|
||||
\multicolumn{3}{l}{\bfseries{Variant summary}} \\
|
||||
\hline
|
||||
& UnifiedGenotyper & QCALL \\
|
||||
\hline
|
||||
Called samples & $ugSamples$ & $qcallSamples$ \\
|
||||
SNPs (all) & $ugAllSNPs$ & $qcallAllSNPs$ \\
|
||||
Ti/Tv (all) & $ugAllTiTv$ & $qcallAllTiTv$ \\
|
||||
SNPs (known) & $ugKnownSNPs$ & $qcallKnownSNPs$ \\
|
||||
Ti/Tv (known) & $ugKnownTiTv$ & $qcallKnownTiTv$ \\
|
||||
SNPs (novel) & $ugNovelSNPs$ & $qcallNovelSNPs$ \\
|
||||
Ti/Tv (novel) & $ugNovelTiTv$ & $qcallNovelTiTv$ \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
|
||||
\lfoot{}
|
||||
\cfoot{}
|
||||
\rfoot{\small{1,000 Genomes Automated Data Processing Report ($timestamp$)}}
|
||||
\end{document}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
\documentclass[10pt]{article}
|
||||
|
||||
\usepackage[margin=1in, paperwidth=10.0in, paperheight=7.5in]{geometry}
|
||||
\usepackage{multicol}
|
||||
\usepackage{fancyhdr}
|
||||
\pagestyle{fancy}
|
||||
|
||||
\begin{document}
|
||||
\lhead{\bfseries{\Large{1,000 Genomes Data Processing Report - $population$ samples}}}
|
||||
\rhead{}
|
||||
\chead{}
|
||||
|
||||
\begin{multicols}{2}
|
||||
\begin{tabular}{|r|l|}
|
||||
\multicolumn{2}{l}{\bfseries{Project Summary ($study_id$)}} \\
|
||||
\hline
|
||||
Used samples & $used_samples$ \\
|
||||
Unused samples & $unused_samples$ \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
|
||||
\begin{tabular}{|r|l|}
|
||||
\multicolumn{2}{l}{\bfseries{Sequencing Summary}} \\
|
||||
\hline
|
||||
Sequencers & $sequencers$ \\
|
||||
Used lanes & $used_lanes$ \\
|
||||
Unused lanes & $unused_lanes$ \\
|
||||
Used lanes/sample & $used_lanes_per_sample$ \\
|
||||
Lane parities & $lane_parities$ \\
|
||||
Read lengths & $read_lengths$ \\
|
||||
Sequencing dates & $sequencing_dates$ \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
|
||||
\begin{tabular}{|r|l|l|}
|
||||
\multicolumn{3}{l}{\bfseries{Bases Summary (excluding unused lanes/samples)}} \\
|
||||
\hline
|
||||
& Per lane & Per sample \\
|
||||
\hline
|
||||
Reads & $reads_per_lane$ & $reads_per_sample$ \\
|
||||
Used bases & $used_bases_per_lane$ & $used_bases_per_sample$ \\
|
||||
Coverage & $coverage_per_lane$ & $coverage_per_sample$ \\
|
||||
\% loci $>$ 1x covered & $loci_gt_1x_per_lane$ & $loci_gt_1x_per_sample$ \\
|
||||
\% loci $>$ 2x covered & $loci_gt_2x_per_lane$ & $loci_gt_2x_per_sample$ \\
|
||||
\% loci $>$ 3x covered & $loci_gt_3x_per_lane$ & $loci_gt_3x_per_sample$ \\
|
||||
\% loci $>$ 4x covered & $loci_gt_4x_per_lane$ & $loci_gt_4x_per_sample$ \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
|
||||
\begin{tabular}{|r|l|l|}
|
||||
\multicolumn{3}{l}{\bfseries{Variant Summary}} \\
|
||||
\hline
|
||||
& Found & Est. FP rate \\
|
||||
\hline
|
||||
All SNPs & $all_snps_found$ & $all_snps_fprate$ \\
|
||||
Known SNPs & $known_snps_found$ & $known_snps_fprate$ \\
|
||||
Novel SNPs & $novel_snps_found$ & $novel_snps_fprate$ \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{multicols}
|
||||
|
||||
%\lfoot{kiran@broadinstitute.org}
|
||||
%\cfoot{}
|
||||
%\rfoot{}
|
||||
\end{document}
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
\documentclass[10pt]{article}
|
||||
|
||||
\usepackage[lmargin=0.3in, rmargin=0.3in, tmargin=0.9in, bmargin=0.9in, paperwidth=10.0in, paperheight=7.5in]{geometry}
|
||||
\usepackage{multicol}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{fancyhdr}
|
||||
|
||||
\pagestyle{fancy}
|
||||
|
||||
\begin{document}
|
||||
\lhead{\bfseries{\Large{Callset concordance - $population$ samples}}}
|
||||
\rhead{}
|
||||
\chead{}
|
||||
|
||||
\begin{center}
|
||||
\begin{tabular}{|r|l|l|}
|
||||
\multicolumn{3}{l}{\bfseries{UG and QCALL}} \\
|
||||
\hline
|
||||
Set & Variants & Ti/Tv \\
|
||||
\hline
|
||||
All SNPs & $Intersection.called.all.nVariantLoci$ & $Intersection.called.all.ti/tv_ratio$ \\
|
||||
Known SNPs & $Intersection.called.known.nVariantLoci$ & $Intersection.called.known.ti/tv_ratio$ \\
|
||||
Novel SNPs & $Intersection.called.novel.nVariantLoci$ & $Intersection.called.novel.ti/tv_ratio$ \\
|
||||
Filtered in UG, present in QCALL & $InUG-FilteredInQCALL.called.all.nVariantLoci$ & $InUG-FilteredInQCALL.called.all.ti/tv_ratio$ \\
|
||||
Filtered in QCALL, present in UG & $InQCALL-FilteredInUG.called.all.nVariantLoci$ & $InQCALL-FilteredInUG.called.all.ti/tv_ratio$ \\
|
||||
\hline
|
||||
\end{tabular} \\[0.1in]
|
||||
|
||||
\includegraphics[width=95mm, type=png, ext=.png, read=.png]{$venndiagram$}
|
||||
\end{center}
|
||||
|
||||
\begin{multicols}{2}
|
||||
\hfill
|
||||
\begin{tabular}{|r|l|l|}
|
||||
\multicolumn{3}{l}{\bfseries{UG only}} \\
|
||||
\hline
|
||||
Set & Variants & Ti/Tv \\
|
||||
\hline
|
||||
All SNPs & $UG.called.all.nVariantLoci$ & $UG.called.all.ti/tv_ratio$ \\
|
||||
Known SNPs & $UG.called.known.nVariantLoci$ & $UG.called.known.ti/tv_ratio$ \\
|
||||
Novel SNPs & $UG.called.novel.nVariantLoci$ & $UG.called.novel.ti/tv_ratio$ \\
|
||||
Filtered & $UG.filtered.all.nVariantLoci$ & $UG.filtered.all.ti/tv_ratio$ \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
|
||||
\begin{tabular}{|r|l|l|}
|
||||
\multicolumn{3}{l}{\bfseries{QCALL only}} \\
|
||||
\hline
|
||||
Set & Variants & Ti/Tv \\
|
||||
\hline
|
||||
All SNPs & $QCALL.called.all.nVariantLoci$ & $QCALL.called.all.ti/tv_ratio$ \\
|
||||
Known SNPs & $QCALL.called.known.nVariantLoci$ & $QCALL.called.known.ti/tv_ratio$ \\
|
||||
Novel SNPs & $QCALL.called.novel.nVariantLoci$ & $QCALL.called.novel.ti/tv_ratio$ \\
|
||||
Filtered & $QCALL.filtered.all.nVariantLoci$ & $QCALL.filtered.all.ti/tv_ratio$ \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{multicols}
|
||||
|
||||
\lfoot{}
|
||||
\cfoot{}
|
||||
\rfoot{\small{1,000 Genomes Automated Data Processing Report ($timestamp$)}}
|
||||
\end{document}
|
||||
Loading…
Reference in New Issue