@PowerBelowFrequency
+ Changes to doc
@ BasicPoolVariantAnalysis
+ use char rather than ReferenceContext
+ calculate # alleles
@ PooledFrequencyAnalysis
+ breakdown of call metrics by estimated number of alleles in pool
@ VariantEvalWalker
+ add PooledFrequencyAnalysis to analysis set
@ PooledGenotypeConcordance
+ correctly calculate maximal allele frequency for output
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1893 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
967128035e
commit
77863d4940
|
|
@ -44,10 +44,10 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
|||
@Argument(fullName="minimumMappingQuality", shortName="mmq", doc="Only use reads above this mapping quality in the power calculation", required=false)
|
||||
int minMappingQuality = -1;
|
||||
|
||||
@Argument(fullName="ignoreForwardReads",doc="Use the forward reads at a site. Defaults to true.", required = false)
|
||||
@Argument(fullName="ignoreForwardReads",doc="Ignore the forward reads at a site. Defaults to false.", required = false)
|
||||
boolean ignoreForwardReads = false;
|
||||
|
||||
@Argument(fullName="ignoreReverseReads",doc="Use the reverse reads at a site. Defaults to true.", required = false)
|
||||
@Argument(fullName="ignoreReverseReads",doc="Ignore the reverse reads at a site. Defaults to false.", required = false)
|
||||
boolean ignoreReverseReads = false;
|
||||
|
||||
public void initialize() {
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import java.util.ArrayList;
|
|||
public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{
|
||||
protected int numIndividualsInPool;
|
||||
protected String name;
|
||||
protected String[] filenames;
|
||||
protected String filenames;
|
||||
protected PrintStream out, callOut;
|
||||
private VariantEvalWalker master;
|
||||
|
||||
|
|
@ -42,7 +42,9 @@ public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{
|
|||
|
||||
public List<String> done() { return new ArrayList<String>(); }
|
||||
|
||||
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String[] filenames) {
|
||||
public int getNumberOfAllelesInPool() { return 2*getNumberOfIndividualsInPool(); }
|
||||
|
||||
public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filenames) {
|
||||
this.master = master;
|
||||
this.out = out;
|
||||
this.callOut = callOut;
|
||||
|
|
@ -53,6 +55,6 @@ public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{
|
|||
// no need to finalize data in general
|
||||
}
|
||||
|
||||
public abstract String update(Variation variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context);
|
||||
public abstract String update(Variation variant, RefMetaDataTracker tracker, char ref, AlignmentContext context);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Oct 20, 2009
|
||||
* Time: 4:03:23 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PooledFrequencyAnalysis extends BasicPoolVariantAnalysis implements PoolAnalysis, GenotypeAnalysis {
|
||||
/*
|
||||
Maintains an array of basic variant analyses broken down by estimated frequency
|
||||
*/
|
||||
private VariantDBCoverage[] coverageAnalysisByFrequency;
|
||||
private VariantCounter[] variantCounterByFrequency;
|
||||
private TransitionTranversionAnalysis[] transitionTransversionByFrequency;
|
||||
|
||||
public PooledFrequencyAnalysis(int poolSize, String knownDBSNPName ) {
|
||||
super("Pooled_Frequency_Analysis",poolSize);
|
||||
coverageAnalysisByFrequency = new VariantDBCoverage[getNumberOfAllelesInPool()+1];
|
||||
variantCounterByFrequency = new VariantCounter[getNumberOfAllelesInPool()+1];
|
||||
transitionTransversionByFrequency = new TransitionTranversionAnalysis[getNumberOfAllelesInPool()+1];
|
||||
for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) {
|
||||
coverageAnalysisByFrequency[j] = new VariantDBCoverage(knownDBSNPName);
|
||||
variantCounterByFrequency[j] = new VariantCounter();
|
||||
transitionTransversionByFrequency[j] = new TransitionTranversionAnalysis();
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize(VariantEvalWalker master, PrintStream out1, PrintStream out2, String name) {
|
||||
super.initialize(master,out1,out2,name);
|
||||
|
||||
}
|
||||
|
||||
public String update( Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context ) {
|
||||
int f = calculateFrequencyFromVariation(eval);
|
||||
String interest1 = coverageAnalysisByFrequency[f].update(eval,tracker,ref,context);
|
||||
String interest2 = variantCounterByFrequency[f].update(eval,tracker,ref,context);
|
||||
String interest3 = transitionTransversionByFrequency[f].update(eval,tracker,ref,context);
|
||||
|
||||
if ( interest1 != null ) {
|
||||
return "coverageAnalysis_Frequency_"+f+"\t"+interest1;
|
||||
}
|
||||
|
||||
if ( interest2 != null ) {
|
||||
return "VariantCounter_Frequency_"+f+"\t"+interest2;
|
||||
}
|
||||
|
||||
if ( interest3 != null ) {
|
||||
return "transitionTransversion_Frequency_"+f+"\t"+interest3;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public int calculateFrequencyFromVariation(Variation eval) {
|
||||
double freq = eval.getNonRefAlleleFrequency();
|
||||
return (int) Math.round(getNumberOfAllelesInPool()*freq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> done() {
|
||||
List<String> s = new ArrayList<String>();
|
||||
for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) {
|
||||
s.add("Pool_Frequency_Analysis_Frequency:\t"+j);
|
||||
s.addAll(coverageAnalysisByFrequency[j].done());
|
||||
s.addAll(variantCounterByFrequency[j].done());
|
||||
s.addAll(transitionTransversionByFrequency[j].done());
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -260,16 +260,16 @@ class PooledConcordanceTable {
|
|||
} else {
|
||||
mismatch = ( evalF != chipS && evalS != chipS ); // test against het nonref
|
||||
}
|
||||
} else if ( chipS == ref ) {
|
||||
} else if ( chipS == ref ) { // het nonref (somehow made nonref|ref rather than ref|nonref)
|
||||
mismatch = ( evalF != chipF && evalS != chipF ); // test against het nonref
|
||||
} else {
|
||||
} else { // chip is homozygous nonreference
|
||||
if( evalF == ref ) {
|
||||
mismatch = ( evalS != chipF && evalS != chipF ); // test against hom nonref
|
||||
mismatch = ( evalS != chipF && evalS != chipS ); // make sure the nonref call of eval matches one of the chip alleles
|
||||
} else if ( evalS == ref ) {
|
||||
mismatch = ( evalF != chipF && evalF != chipF ); // test against hom nonref
|
||||
mismatch = ( evalF != chipF && evalF != chipS ); // make sure the nonref call of eval matches one of the chip alleles
|
||||
} else {
|
||||
// both are hom nonref
|
||||
mismatch = ( evalF != chipF );
|
||||
mismatch = ( evalF != chipF || evalS != chipS); // could be multiallelic calls
|
||||
}
|
||||
}
|
||||
return mismatch;
|
||||
|
|
@ -373,15 +373,18 @@ class PooledConcordanceTable {
|
|||
}
|
||||
|
||||
public int getLargestOutputAlleleFrequencyIndex() {
|
||||
// TODO -- this code may be bugged -- is the first index at which no hapmap sites appear
|
||||
// TODO -- also the index above which no hapmap sites are guaranteed to appear with said frequency
|
||||
int nHapmapSitesAtFreq = 1;
|
||||
// fixed - returns the last index at which genotype sites appear
|
||||
|
||||
int freqIndex = -1;
|
||||
while ( nHapmapSitesAtFreq > 0 && freqIndex < calculateNumFrequencyIndeces(poolSize) ) {
|
||||
freqIndex ++;
|
||||
for ( int j = 0; j < calculateNumFrequencyIndeces(poolSize); j ++ ) {
|
||||
int nHapmapSitesAtFreq = 0;
|
||||
for ( int i = 0; i < CALL_INDECES; i ++) {
|
||||
nHapmapSitesAtFreq += table[TRUTH_REF][i] + table[TRUTH_VAR][i] + table[TRUTH_UNKNOWN][i];
|
||||
}
|
||||
|
||||
if ( nHapmapSitesAtFreq > 0 ) {
|
||||
freqIndex = j;
|
||||
}
|
||||
}
|
||||
|
||||
return freqIndex;
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
public boolean supressDateInformation = false;
|
||||
|
||||
@Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false)
|
||||
public int numPeopleInPool = 1;
|
||||
public int numPeopleInPool = -1;
|
||||
|
||||
@Argument(fullName = "pathToHapmapPoolFile", shortName="HPF", doc="If using a variant file from a pooled caller on pools of hapmap individuals, this field provides a filepath to the pool construction file listing which hapmap individuals are in which pool", required=false)
|
||||
public String pathToHapmapPoolFile = null;
|
||||
|
|
@ -132,6 +132,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
analyses.add(new PooledGenotypeConcordance(pathToHapmapPoolFile));
|
||||
analyses.add(new VariantCounter());
|
||||
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
||||
analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName));
|
||||
analyses.add(new GenotypeConcordance(genotypeChipName));
|
||||
analyses.add(new TransitionTranversionAnalysis());
|
||||
analyses.add(new NeighborDistanceAnalysis());
|
||||
|
|
@ -148,7 +149,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
VariantAnalysis analysis = iter.next();
|
||||
boolean disableForGenotyping = evalContainsGenotypes && !(analysis instanceof GenotypeAnalysis);
|
||||
boolean disableForPopulation = !evalContainsGenotypes && !(analysis instanceof PopulationAnalysis);
|
||||
boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance);
|
||||
boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance) || (numPeopleInPool < 1 && analysis instanceof PooledGenotypeConcordance);
|
||||
boolean disable = disableForGenotyping | disableForPopulation | disableForPools;
|
||||
String causeName = disableForGenotyping ? "population" : (disableForPopulation ? "genotype" : (disableForPools ? "pool" : null));
|
||||
if (disable) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue