diff --git a/R/plot_Annotations_BinnedTruthMetrics.R b/R/plot_Annotations_BinnedTruthMetrics.R index 182edfb2c..6e68ac88d 100644 --- a/R/plot_Annotations_BinnedTruthMetrics.R +++ b/R/plot_Annotations_BinnedTruthMetrics.R @@ -73,20 +73,26 @@ dev.off() # Plot dbsnp and true positive rate as a function of the annotation # +ymin = min(all$dbsnp) +ymax = max(all$dbsnp) outfile = paste(input, ".truthRate.", annotationName, ".pdf", sep="") pdf(outfile, height=7, width=7) par(cex=1.1) yLabel = "DBsnp Rate" if( sum(all$truePositive==0) != length(all$truePositive) ) { -yLabel = "DBsnp/Truth Rate" +t = all[all$truePositive>0,] +yLabel = "DBsnp/True Positive Rate" +ymin = min(min(all$dbsnp),min(t$truePositive)) +ymax = max(max(all$dbsnp),max(t$truePositive)) + } -plot(all$value,all$dbsnp,xlab=annotationName,ylab=yLabel,pch=20,xaxt="n",ps=14); +plot(all$value,all$dbsnp,xlab=annotationName,ylab=yLabel,pch=20,ylim=c(ymin,ymax),xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) abline(v=m,lty=2) abline(v=m75,lty=2) abline(v=m25,lty=2) if( sum(all$truePositive==0) != length(all$truePositive) ) { -points(all$value,all$truePositive,col="magenta",pch=20); +points(t$value,t$truePositive,col="magenta",pch=20); legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20)) } dev.off() @@ -102,13 +108,13 @@ yLabel = "DBsnp Rate" if( sum(all$truePositive==0) != length(all$truePositive) ) { yLabel = "DBsnp/Truth Rate" } -plot(all$value,all$dbsnp,xlab=annotationName,log="x",ylab=yLabel,pch=20,xaxt="n",ps=14); +plot(all$value,all$dbsnp,xlab=annotationName,log="x",ylab=yLabel,ylim=c(ymin,ymax),pch=20,xaxt="n",ps=14); axis(1,axTicks(1), format(axTicks(1), scientific=F)) abline(v=m,lty=2) abline(v=m75,lty=2) abline(v=m25,lty=2) if( sum(all$truePositive==0) != length(all$truePositive) ) { -points(all$value,all$truePositive,col="magenta",pch=20); +points(t$value,t$truePositive,col="magenta",pch=20); legend("topleft", c("dbsnp","truth"),col=c("black","magenta"),pch=c(20,20)) } dev.off() diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java new file mode 100755 index 000000000..2d21dd3ea --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java @@ -0,0 +1,84 @@ +package org.broadinstitute.sting.gatk.walkers.recalibration; + +import net.sf.samtools.SAMRecord; + +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Jan 29, 2010 + * + * The number of previous N bases (along the direction of the read) that contain G's and C's. + * Only valid for Illumina reads. Otherwise return -1. + */ + +public class GCContentCovariate implements ExperimentalCovariate { + + int numBack = 7; + + // Initialize any member variables using the command-line arguments passed to the walkers + public void initialize( final RecalibrationArgumentCollection RAC ) { + numBack = RAC.HOMOPOLYMER_NBACK; + } + + // Used to pick out the covariate's value from attributes of the read + public final Comparable getValue( final SAMRecord read, final int offset ) { + + // ATTGCCCCGTAAAAAAAGAGAA + // 0000123456654321001122 + + if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) ) { + int numGC = 0; + int startPos = 0; + int stopPos = 0; + final byte[] bases = read.getReadBases(); + if( !read.getReadNegativeStrandFlag() ) { // Forward direction + startPos = Math.max(offset - numBack, 0); + stopPos = Math.max(offset - 1, 0); + } else { // Negative direction + startPos = Math.min(offset + 2, bases.length); + stopPos = Math.min(offset + numBack + 1, bases.length); + } + + for( int iii = startPos; iii < stopPos; iii++ ) { + if( bases[iii] == (byte)'G' || bases[iii] == (byte)'C' ) { + numGC++; + } + } + + return numGC; + } else { // This effect is specific to the Illumina platform + return -1; + } + } + + // Used to get the covariate's value from input csv file in TableRecalibrationWalker + public final Comparable getValue( final String str ) { + return Integer.parseInt( str ); + } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index 2527ad2a8..afb7feb92 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -1,12 +1,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; +import org.broadinstitute.sting.gatk.refdata.RodGLF; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; /* * Copyright (c) 2010 The Broad Institute @@ -87,11 +90,27 @@ public class AnalyzeAnnotationsWalker extends RodWalker { if( tracker != null ) { // First find out if this variant is in the truth set + boolean isInTruthSet = false; boolean isTrueVariant = false; for( ReferenceOrderedDatum rod : tracker.getAllRods() ) { if( rod != null && rod.getName().toUpperCase().startsWith("TRUTH") ) { - isTrueVariant = true; - break; // at least one of the truth files has this variant, so break out + isInTruthSet = true; + + if( rod instanceof RodVCF ) { + if( ((RodVCF) rod).isSNP() ) { + isTrueVariant = true; + } + } else if( rod instanceof RodGLF ) { + if( ((RodGLF) rod).isSNP() ) { + isTrueVariant = true; + } + } else if( rod instanceof VariantBackedByGenotype ) { + if( ((VariantBackedByGenotype) rod).getCalledGenotype().isVariant(ref.getBase()) ) { + isTrueVariant = true; + } + } else { + throw new StingException( "Truth ROD is of unknown ROD type: " + rod.getName() ); + } } } @@ -100,7 +119,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker { if( rod != null && rod instanceof RodVCF && !rod.getName().toUpperCase().startsWith("TRUTH") ) { final RodVCF variant = (RodVCF) rod; if( variant.isSNP() ) { - dataManager.addAnnotations( variant, SAMPLE_NAME, isTrueVariant ); + dataManager.addAnnotations( variant, SAMPLE_NAME, isInTruthSet, isTrueVariant ); } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index 5db86f689..53e9bbc90 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -48,7 +48,7 @@ public class AnnotationDataManager { data = new HashMap>(); } - public void addAnnotations( final RodVCF variant, final String sampleName, final boolean isTrueVariant ) { + public void addAnnotations( final RodVCF variant, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) { if( sampleName != null ) { // Only process variants that are found in the sample with this sampleName if( variant.getGenotype(sampleName).isNoCall() ) { // This variant isn't found in this sample so break out @@ -83,9 +83,9 @@ public class AnnotationDataManager { // Decide if the variant is a transition or transversion if( BaseUtils.isTransition( (byte)variant.getReferenceForSNP(), (byte)variant.getAlternativeBaseForSNP()) ) { - datum.incrementTi( isNovelVariant, isTrueVariant ); + datum.incrementTi( isNovelVariant, isInTruthSet, isTrueVariant ); } else { - datum.incrementTv( isNovelVariant, isTrueVariant ); + datum.incrementTv( isNovelVariant, isInTruthSet, isTrueVariant ); } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java index af8f66953..5fb668ade 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDatum.java @@ -45,7 +45,8 @@ public class AnnotationDatum implements Comparator { public static final int NOVEL_SET = 1; public static final int DBSNP_SET = 2; public static final int TRUTH_SET = 3; - private static final int NUM_SETS = 4; + public static final int TRUE_POSITIVE = 4; + private static final int NUM_SETS = 5; public AnnotationDatum() { @@ -69,7 +70,7 @@ public class AnnotationDatum implements Comparator { } } - final public void incrementTi( final boolean isNovelVariant, final boolean isTrueVariant ) { + final public void incrementTi( final boolean isNovelVariant, final boolean isInTruthSet, final boolean isTrueVariant ) { ti[FULL_SET]++; if( isNovelVariant ) { @@ -77,12 +78,15 @@ public class AnnotationDatum implements Comparator { } else { // Is known, in DBsnp ti[DBSNP_SET]++; } - if( isTrueVariant ) { + if( isInTruthSet ) { ti[TRUTH_SET]++; + if( isTrueVariant ) { + ti[TRUE_POSITIVE]++; + } } } - final public void incrementTv( final boolean isNovelVariant, final boolean isTrueVariant ) { + final public void incrementTv( final boolean isNovelVariant, final boolean isInTruthSet, final boolean isTrueVariant ) { tv[FULL_SET]++; if( isNovelVariant ) { @@ -90,8 +94,11 @@ public class AnnotationDatum implements Comparator { } else { // Is known, in DBsnp tv[DBSNP_SET]++; } - if( isTrueVariant ) { + if( isInTruthSet ) { tv[TRUTH_SET]++; + if( isTrueVariant ) { + tv[TRUE_POSITIVE]++; + } } } @@ -129,12 +136,12 @@ public class AnnotationDatum implements Comparator { final public float calcTPrate() { - if( ti[FULL_SET] + tv[FULL_SET] == 0 ) { // Don't divide by zero + if( ti[TRUTH_SET] + tv[TRUTH_SET] == 0 ) { // Don't divide by zero return 0.0f; } - return 100.0f * ((float) ti[TRUTH_SET] + tv[TRUTH_SET]) / - ((float) ti[FULL_SET] + tv[FULL_SET]); + return 100.0f * ((float) ti[TRUE_POSITIVE] + tv[TRUE_POSITIVE]) / + ((float) ti[TRUTH_SET] + tv[TRUTH_SET]); } final public int numVariants( final int INDEX ) {