@PowerBelowFrequency

+ Changes to doc

@ BasicPoolVariantAnalysis
    + use char rather than ReferenceContext
    + calculate # alleles

@ PooledFrequencyAnalysis
    + breakdown of call metrics by estimated number of alleles in pool

@ VariantEvalWalker
    + add PooledFrequencyAnalysis to analysis set

@ PooledGenotypeConcordance
    + correctly calculate maximal allele frequency for output




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1893 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-10-21 15:17:11 +00:00
parent 967128035e
commit 77863d4940
5 changed files with 106 additions and 17 deletions

View File

@ -44,10 +44,10 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
@Argument(fullName="minimumMappingQuality", shortName="mmq", doc="Only use reads above this mapping quality in the power calculation", required=false)
int minMappingQuality = -1;
@Argument(fullName="ignoreForwardReads",doc="Use the forward reads at a site. Defaults to true.", required = false)
@Argument(fullName="ignoreForwardReads",doc="Ignore the forward reads at a site. Defaults to false.", required = false)
boolean ignoreForwardReads = false;
@Argument(fullName="ignoreReverseReads",doc="Use the reverse reads at a site. Defaults to true.", required = false)
@Argument(fullName="ignoreReverseReads",doc="Ignore the reverse reads at a site. Defaults to false.", required = false)
boolean ignoreReverseReads = false;
public void initialize() {

View File

@ -19,7 +19,7 @@ import java.util.ArrayList;
public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{
protected int numIndividualsInPool;
protected String name;
protected String[] filenames;
protected String filenames;
protected PrintStream out, callOut;
private VariantEvalWalker master;
@ -42,7 +42,9 @@ public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{
public List<String> done() { return new ArrayList<String>(); }
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String[] filenames) {
public int getNumberOfAllelesInPool() { return 2*getNumberOfIndividualsInPool(); }
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filenames) {
this.master = master;
this.out = out;
this.callOut = callOut;
@ -53,6 +55,6 @@ public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{
// no need to finalize data in general
}
public abstract String update(Variation variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context);
public abstract String update(Variation variant, RefMetaDataTracker tracker, char ref, AlignmentContext context);
}

View File

@ -0,0 +1,83 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import java.util.List;
import java.util.ArrayList;
import java.io.PrintStream;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Oct 20, 2009
* Time: 4:03:23 PM
* To change this template use File | Settings | File Templates.
*/
public class PooledFrequencyAnalysis extends BasicPoolVariantAnalysis implements PoolAnalysis, GenotypeAnalysis {
/*
Maintains an array of basic variant analyses broken down by estimated frequency
*/
private VariantDBCoverage[] coverageAnalysisByFrequency;
private VariantCounter[] variantCounterByFrequency;
private TransitionTranversionAnalysis[] transitionTransversionByFrequency;
public PooledFrequencyAnalysis(int poolSize, String knownDBSNPName ) {
super("Pooled_Frequency_Analysis",poolSize);
coverageAnalysisByFrequency = new VariantDBCoverage[getNumberOfAllelesInPool()+1];
variantCounterByFrequency = new VariantCounter[getNumberOfAllelesInPool()+1];
transitionTransversionByFrequency = new TransitionTranversionAnalysis[getNumberOfAllelesInPool()+1];
for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) {
coverageAnalysisByFrequency[j] = new VariantDBCoverage(knownDBSNPName);
variantCounterByFrequency[j] = new VariantCounter();
transitionTransversionByFrequency[j] = new TransitionTranversionAnalysis();
}
}
public void initialize(VariantEvalWalker master, PrintStream out1, PrintStream out2, String name) {
super.initialize(master,out1,out2,name);
}
public String update( Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context ) {
int f = calculateFrequencyFromVariation(eval);
String interest1 = coverageAnalysisByFrequency[f].update(eval,tracker,ref,context);
String interest2 = variantCounterByFrequency[f].update(eval,tracker,ref,context);
String interest3 = transitionTransversionByFrequency[f].update(eval,tracker,ref,context);
if ( interest1 != null ) {
return "coverageAnalysis_Frequency_"+f+"\t"+interest1;
}
if ( interest2 != null ) {
return "VariantCounter_Frequency_"+f+"\t"+interest2;
}
if ( interest3 != null ) {
return "transitionTransversion_Frequency_"+f+"\t"+interest3;
}
return null;
}
public int calculateFrequencyFromVariation(Variation eval) {
double freq = eval.getNonRefAlleleFrequency();
return (int) Math.round(getNumberOfAllelesInPool()*freq);
}
@Override
public List<String> done() {
List<String> s = new ArrayList<String>();
for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) {
s.add("Pool_Frequency_Analysis_Frequency:\t"+j);
s.addAll(coverageAnalysisByFrequency[j].done());
s.addAll(variantCounterByFrequency[j].done());
s.addAll(transitionTransversionByFrequency[j].done());
}
return s;
}
}

View File

@ -260,16 +260,16 @@ class PooledConcordanceTable {
} else {
mismatch = ( evalF != chipS && evalS != chipS ); // test against het nonref
}
} else if ( chipS == ref ) {
} else if ( chipS == ref ) { // het nonref (somehow made nonref|ref rather than ref|nonref)
mismatch = ( evalF != chipF && evalS != chipF ); // test against het nonref
} else {
} else { // chip is homozygous nonreference
if( evalF == ref ) {
mismatch = ( evalS != chipF && evalS != chipF ); // test against hom nonref
mismatch = ( evalS != chipF && evalS != chipS ); // make sure the nonref call of eval matches one of the chip alleles
} else if ( evalS == ref ) {
mismatch = ( evalF != chipF && evalF != chipF ); // test against hom nonref
mismatch = ( evalF != chipF && evalF != chipS ); // make sure the nonref call of eval matches one of the chip alleles
} else {
// both are hom nonref
mismatch = ( evalF != chipF );
mismatch = ( evalF != chipF || evalS != chipS); // could be multiallelic calls
}
}
return mismatch;
@ -373,15 +373,18 @@ class PooledConcordanceTable {
}
public int getLargestOutputAlleleFrequencyIndex() {
// TODO -- this code may be bugged -- is the first index at which no hapmap sites appear
// TODO -- also the index above which no hapmap sites are guaranteed to appear with said frequency
int nHapmapSitesAtFreq = 1;
// fixed - returns the last index at which genotype sites appear
int freqIndex = -1;
while ( nHapmapSitesAtFreq > 0 && freqIndex < calculateNumFrequencyIndeces(poolSize) ) {
freqIndex ++;
for ( int j = 0; j < calculateNumFrequencyIndeces(poolSize); j ++ ) {
int nHapmapSitesAtFreq = 0;
for ( int i = 0; i < CALL_INDECES; i ++) {
nHapmapSitesAtFreq += table[TRUTH_REF][i] + table[TRUTH_VAR][i] + table[TRUTH_UNKNOWN][i];
}
if ( nHapmapSitesAtFreq > 0 ) {
freqIndex = j;
}
}
return freqIndex;

View File

@ -54,7 +54,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
public boolean supressDateInformation = false;
@Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false)
public int numPeopleInPool = 1;
public int numPeopleInPool = -1;
@Argument(fullName = "pathToHapmapPoolFile", shortName="HPF", doc="If using a variant file from a pooled caller on pools of hapmap individuals, this field provides a filepath to the pool construction file listing which hapmap individuals are in which pool", required=false)
public String pathToHapmapPoolFile = null;
@ -132,6 +132,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
analyses.add(new PooledGenotypeConcordance(pathToHapmapPoolFile));
analyses.add(new VariantCounter());
analyses.add(new VariantDBCoverage(knownSNPDBName));
analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName));
analyses.add(new GenotypeConcordance(genotypeChipName));
analyses.add(new TransitionTranversionAnalysis());
analyses.add(new NeighborDistanceAnalysis());
@ -148,7 +149,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
VariantAnalysis analysis = iter.next();
boolean disableForGenotyping = evalContainsGenotypes && !(analysis instanceof GenotypeAnalysis);
boolean disableForPopulation = !evalContainsGenotypes && !(analysis instanceof PopulationAnalysis);
boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance);
boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance) || (numPeopleInPool < 1 && analysis instanceof PooledGenotypeConcordance);
boolean disable = disableForGenotyping | disableForPopulation | disableForPools;
String causeName = disableForGenotyping ? "population" : (disableForPopulation ? "genotype" : (disableForPools ? "pool" : null));
if (disable) {