diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java index 3a5c20580..b4e3ef54b 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java @@ -44,10 +44,10 @@ public class PowerBelowFrequencyWalker extends LocusWalker { @Argument(fullName="minimumMappingQuality", shortName="mmq", doc="Only use reads above this mapping quality in the power calculation", required=false) int minMappingQuality = -1; - @Argument(fullName="ignoreForwardReads",doc="Use the forward reads at a site. Defaults to true.", required = false) + @Argument(fullName="ignoreForwardReads",doc="Ignore the forward reads at a site. Defaults to false.", required = false) boolean ignoreForwardReads = false; - @Argument(fullName="ignoreReverseReads",doc="Use the reverse reads at a site. Defaults to true.", required = false) + @Argument(fullName="ignoreReverseReads",doc="Ignore the reverse reads at a site. Defaults to false.", required = false) boolean ignoreReverseReads = false; public void initialize() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java index ce1d4732b..fec86460f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java @@ -19,7 +19,7 @@ import java.util.ArrayList; public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{ protected int numIndividualsInPool; protected String name; - protected String[] filenames; + protected String filenames; protected PrintStream out, callOut; private VariantEvalWalker master; @@ -42,7 +42,9 @@ public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{ public List done() { return new ArrayList(); } - public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String[] filenames) { + public int getNumberOfAllelesInPool() { return 2*getNumberOfIndividualsInPool(); } + + public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filenames) { this.master = master; this.out = out; this.callOut = callOut; @@ -53,6 +55,6 @@ public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{ // no need to finalize data in general } - public abstract String update(Variation variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context); + public abstract String update(Variation variant, RefMetaDataTracker tracker, char ref, AlignmentContext context); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledFrequencyAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledFrequencyAnalysis.java new file mode 100644 index 000000000..85711f124 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledFrequencyAnalysis.java @@ -0,0 +1,83 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; + +import java.util.List; +import java.util.ArrayList; +import java.io.PrintStream; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Oct 20, 2009 + * Time: 4:03:23 PM + * To change this template use File | Settings | File Templates. + */ +public class PooledFrequencyAnalysis extends BasicPoolVariantAnalysis implements PoolAnalysis, GenotypeAnalysis { + /* + Maintains an array of basic variant analyses broken down by estimated frequency + */ + private VariantDBCoverage[] coverageAnalysisByFrequency; + private VariantCounter[] variantCounterByFrequency; + private TransitionTranversionAnalysis[] transitionTransversionByFrequency; + + public PooledFrequencyAnalysis(int poolSize, String knownDBSNPName ) { + super("Pooled_Frequency_Analysis",poolSize); + coverageAnalysisByFrequency = new VariantDBCoverage[getNumberOfAllelesInPool()+1]; + variantCounterByFrequency = new VariantCounter[getNumberOfAllelesInPool()+1]; + transitionTransversionByFrequency = new TransitionTranversionAnalysis[getNumberOfAllelesInPool()+1]; + for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) { + coverageAnalysisByFrequency[j] = new VariantDBCoverage(knownDBSNPName); + variantCounterByFrequency[j] = new VariantCounter(); + transitionTransversionByFrequency[j] = new TransitionTranversionAnalysis(); + } + } + + public void initialize(VariantEvalWalker master, PrintStream out1, PrintStream out2, String name) { + super.initialize(master,out1,out2,name); + + } + + public String update( Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context ) { + int f = calculateFrequencyFromVariation(eval); + String interest1 = coverageAnalysisByFrequency[f].update(eval,tracker,ref,context); + String interest2 = variantCounterByFrequency[f].update(eval,tracker,ref,context); + String interest3 = transitionTransversionByFrequency[f].update(eval,tracker,ref,context); + + if ( interest1 != null ) { + return "coverageAnalysis_Frequency_"+f+"\t"+interest1; + } + + if ( interest2 != null ) { + return "VariantCounter_Frequency_"+f+"\t"+interest2; + } + + if ( interest3 != null ) { + return "transitionTransversion_Frequency_"+f+"\t"+interest3; + } + + return null; + } + + public int calculateFrequencyFromVariation(Variation eval) { + double freq = eval.getNonRefAlleleFrequency(); + return (int) Math.round(getNumberOfAllelesInPool()*freq); + } + + @Override + public List done() { + List s = new ArrayList(); + for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) { + s.add("Pool_Frequency_Analysis_Frequency:\t"+j); + s.addAll(coverageAnalysisByFrequency[j].done()); + s.addAll(variantCounterByFrequency[j].done()); + s.addAll(transitionTransversionByFrequency[j].done()); + } + + return s; + } + +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java index 53c8ccecc..8af756b2d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java @@ -260,16 +260,16 @@ class PooledConcordanceTable { } else { mismatch = ( evalF != chipS && evalS != chipS ); // test against het nonref } - } else if ( chipS == ref ) { + } else if ( chipS == ref ) { // het nonref (somehow made nonref|ref rather than ref|nonref) mismatch = ( evalF != chipF && evalS != chipF ); // test against het nonref - } else { + } else { // chip is homozygous nonreference if( evalF == ref ) { - mismatch = ( evalS != chipF && evalS != chipF ); // test against hom nonref + mismatch = ( evalS != chipF && evalS != chipS ); // make sure the nonref call of eval matches one of the chip alleles } else if ( evalS == ref ) { - mismatch = ( evalF != chipF && evalF != chipF ); // test against hom nonref + mismatch = ( evalF != chipF && evalF != chipS ); // make sure the nonref call of eval matches one of the chip alleles } else { // both are hom nonref - mismatch = ( evalF != chipF ); + mismatch = ( evalF != chipF || evalS != chipS); // could be multiallelic calls } } return mismatch; @@ -373,15 +373,18 @@ class PooledConcordanceTable { } public int getLargestOutputAlleleFrequencyIndex() { - // TODO -- this code may be bugged -- is the first index at which no hapmap sites appear - // TODO -- also the index above which no hapmap sites are guaranteed to appear with said frequency - int nHapmapSitesAtFreq = 1; + // fixed - returns the last index at which genotype sites appear + int freqIndex = -1; - while ( nHapmapSitesAtFreq > 0 && freqIndex < calculateNumFrequencyIndeces(poolSize) ) { - freqIndex ++; + for ( int j = 0; j < calculateNumFrequencyIndeces(poolSize); j ++ ) { + int nHapmapSitesAtFreq = 0; for ( int i = 0; i < CALL_INDECES; i ++) { nHapmapSitesAtFreq += table[TRUTH_REF][i] + table[TRUTH_VAR][i] + table[TRUTH_UNKNOWN][i]; } + + if ( nHapmapSitesAtFreq > 0 ) { + freqIndex = j; + } } return freqIndex; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index ae96d3f08..146466df7 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -54,7 +54,7 @@ public class VariantEvalWalker extends RodWalker { public boolean supressDateInformation = false; @Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false) - public int numPeopleInPool = 1; + public int numPeopleInPool = -1; @Argument(fullName = "pathToHapmapPoolFile", shortName="HPF", doc="If using a variant file from a pooled caller on pools of hapmap individuals, this field provides a filepath to the pool construction file listing which hapmap individuals are in which pool", required=false) public String pathToHapmapPoolFile = null; @@ -132,6 +132,7 @@ public class VariantEvalWalker extends RodWalker { analyses.add(new PooledGenotypeConcordance(pathToHapmapPoolFile)); analyses.add(new VariantCounter()); analyses.add(new VariantDBCoverage(knownSNPDBName)); + analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName)); analyses.add(new GenotypeConcordance(genotypeChipName)); analyses.add(new TransitionTranversionAnalysis()); analyses.add(new NeighborDistanceAnalysis()); @@ -148,7 +149,7 @@ public class VariantEvalWalker extends RodWalker { VariantAnalysis analysis = iter.next(); boolean disableForGenotyping = evalContainsGenotypes && !(analysis instanceof GenotypeAnalysis); boolean disableForPopulation = !evalContainsGenotypes && !(analysis instanceof PopulationAnalysis); - boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance); + boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance) || (numPeopleInPool < 1 && analysis instanceof PooledGenotypeConcordance); boolean disable = disableForGenotyping | disableForPopulation | disableForPools; String causeName = disableForGenotyping ? "population" : (disableForPopulation ? "genotype" : (disableForPools ? "pool" : null)); if (disable) {