Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-03-26 16:42:03 -04:00
commit e8bb8ade1a
11 changed files with 444 additions and 246 deletions

View File

@ -26,14 +26,12 @@ package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.math.NumberUtils;
import java.util.Arrays;
import java.util.Collection;
import java.util.TreeMap;
import java.util.*;
/**
* Holds values for a column in a GATK report table
*/
public class GATKReportColumn extends TreeMap<Object, Object> {
public class GATKReportColumn extends LinkedHashMap<Object, Object> {
final private String columnName;
final private Object defaultValue;
final private String format;

View File

@ -0,0 +1,286 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.util.*;
/**
* Code for determining which indels are segregating among the samples.
*
* This code is just a refactor of the original code from Guillermo in the UG.
*
* @author Mark DePristo
* @since 3/26/12
*/
public class ConsensusAlleleCounter {
final protected static Logger logger = Logger.getLogger(ConsensusAlleleCounter.class);
private final int minIndelCountForGenotyping;
private final boolean doMultiAllelicCalls;
private final double minFractionInOneSample;
private final GenomeLocParser locParser;
public ConsensusAlleleCounter(final GenomeLocParser locParser,
final boolean doMultiAllelicCalls,
final int minIndelCountForGenotyping,
final double minFractionInOneSample) {
this.minIndelCountForGenotyping = minIndelCountForGenotyping;
this.doMultiAllelicCalls = doMultiAllelicCalls;
this.minFractionInOneSample = minFractionInOneSample;
this.locParser = locParser;
}
/**
* Returns a list of Alleles at this locus that may be segregating
*
* @param ref
* @param contexts
* @param contextType
* @return
*/
public List<Allele> computeConsensusAlleles(ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType) {
final Map<String, Integer> consensusIndelStrings = countConsensusAlleles(ref, contexts, contextType);
// logger.info("Alleles at " + ref.getLocus());
// for ( Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet() ) {
// logger.info(" " + elt.getValue() + " => " + elt.getKey());
// }
return consensusCountsToAlleles(ref, consensusIndelStrings);
}
//
// TODO -- WARNING DOESN'T WORK WITH REDUCED READS
//
private Map<String, Integer> countConsensusAlleles(ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType) {
final GenomeLoc loc = ref.getLocus();
HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
int insCount = 0, delCount = 0;
// quick check of total number of indels in pileup
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
insCount += indelPileup.getNumberOfInsertions();
delCount += indelPileup.getNumberOfDeletions();
}
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
return Collections.emptyMap();
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
// todo -- warning, can be duplicating expensive partition here
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
final int nIndelReads = indelPileup.getNumberOfInsertions() + indelPileup.getNumberOfDeletions();
final int nReadsOverall = indelPileup.getNumberOfElements();
if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) {
// if ( nIndelReads > 0 )
// logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
continue;
// } else {
// logger.info("### Keeping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
}
for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null)
continue;
if (ReadUtils.is454Read(read)) {
continue;
}
/* if (DEBUG && p.isIndel()) {
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
p.getEventLength(),p.getType().toString(), p.getEventBases());
}
*/
String indelString = p.getEventBases();
if (p.isInsertion()) {
boolean foundKey = false;
// copy of hashmap into temp arrayList
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
for (String s : consensusIndelStrings.keySet()) {
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
}
if (read.getAlignmentEnd() == loc.getStart()) {
// first corner condition: a read has an insertion at the end, and we're right at the insertion.
// In this case, the read could have any of the inserted bases and we need to build a consensus
for (int k=0; k < cList.size(); k++) {
String s = cList.get(k).getFirst();
int cnt = cList.get(k).getSecond();
// case 1: current insertion is prefix of indel in hash map
if (s.startsWith(indelString)) {
cList.set(k,new Pair<String, Integer>(s,cnt+1));
foundKey = true;
}
else if (indelString.startsWith(s)) {
// case 2: indel stored in hash table is prefix of current insertion
// In this case, new bases are new key.
foundKey = true;
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
}
}
if (!foundKey)
// none of the above: event bases not supported by previous table, so add new key
cList.add(new Pair<String, Integer>(indelString,1));
}
else if (read.getAlignmentStart() == loc.getStart()+1) {
// opposite corner condition: read will start at current locus with an insertion
for (int k=0; k < cList.size(); k++) {
String s = cList.get(k).getFirst();
int cnt = cList.get(k).getSecond();
if (s.endsWith(indelString)) {
// case 1: current insertion (indelString) is suffix of indel in hash map (s)
cList.set(k,new Pair<String, Integer>(s,cnt+1));
foundKey = true;
}
else if (indelString.endsWith(s)) {
// case 2: indel stored in hash table is prefix of current insertion
// In this case, new bases are new key.
foundKey = true;
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
}
}
if (!foundKey)
// none of the above: event bases not supported by previous table, so add new key
cList.add(new Pair<String, Integer>(indelString,1));
}
else {
// normal case: insertion somewhere in the middle of a read: add count to arrayList
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
cList.add(new Pair<String, Integer>(indelString,cnt+1));
}
// copy back arrayList into hashMap
consensusIndelStrings.clear();
for (Pair<String,Integer> pair : cList) {
consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
}
}
else if (p.isDeletion()) {
indelString = String.format("D%d",p.getEventLength());
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
consensusIndelStrings.put(indelString,cnt+1);
}
}
}
return consensusIndelStrings;
}
private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
final Map<String, Integer> consensusIndelStrings) {
final GenomeLoc loc = ref.getLocus();
final Collection<VariantContext> vcs = new ArrayList<VariantContext>();
int maxAlleleCnt = 0;
Allele refAllele, altAllele;
for (final Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet()) {
final String s = elt.getKey();
final int curCnt = elt.getValue();
int stop = 0;
// if observed count if above minimum threshold, we will genotype this allele
if (curCnt < minIndelCountForGenotyping)
continue;
if (s.startsWith("D")) {
// get deletion length
final int dLen = Integer.valueOf(s.substring(1));
// get ref bases of accurate deletion
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
stop = loc.getStart() + dLen;
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
if (Allele.acceptableAlleleBases(refBases)) {
refAllele = Allele.create(refBases, true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
}
else continue; // don't go on with this allele if refBases are non-standard
} else {
// insertion case
if (Allele.acceptableAlleleBases(s)) {
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(s, false);
stop = loc.getStart();
}
else continue; // go on to next allele if consensus insertion has any non-standard base.
}
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(Arrays.asList(refAllele, altAllele));
builder.referenceBaseForIndel(ref.getBase());
builder.noGenotypes();
if (doMultiAllelicCalls)
vcs.add(builder.make());
else {
if (curCnt > maxAlleleCnt) {
maxAlleleCnt = curCnt;
vcs.clear();
vcs.add(builder.make());
}
}
}
if (vcs.isEmpty())
return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
final VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
return mergedVC.getAlleles();
}
}

View File

@ -52,11 +52,9 @@ import java.util.*;
public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
private final int HAPLOTYPE_SIZE;
private final int minIndelCountForGenotyping;
private final boolean getAlleleListFromVCF;
private boolean DEBUG = false;
private final boolean doMultiAllelicCalls = true;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
private PairHMMIndelErrorModel pairModel;
@ -72,7 +70,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// gdebug removeme
// todo -cleanup
private GenomeLoc lastSiteVisited;
private ArrayList<Allele> alleleList;
private List<Allele> alleleList = new ArrayList<Allele>();
static {
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
@ -83,205 +81,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
super(UAC, logger);
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
alleleList = new ArrayList<Allele>();
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
}
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
Allele refAllele = null, altAllele = null;
GenomeLoc loc = ref.getLocus();
ArrayList<Allele> aList = new ArrayList<Allele>();
HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
int insCount = 0, delCount = 0;
// quick check of total number of indels in pileup
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
insCount += indelPileup.getNumberOfInsertions();
delCount += indelPileup.getNumberOfDeletions();
}
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
return aList;
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
// todo -- warning, can be duplicating expensive partition here
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
//SAMRecord read = p.getRead();
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null)
continue;
if (ReadUtils.is454Read(read)) {
continue;
}
/* if (DEBUG && p.isIndel()) {
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
p.getEventLength(),p.getType().toString(), p.getEventBases());
}
*/
String indelString = p.getEventBases();
if (p.isInsertion()) {
boolean foundKey = false;
// copy of hashmap into temp arrayList
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
for (String s : consensusIndelStrings.keySet()) {
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
}
if (read.getAlignmentEnd() == loc.getStart()) {
// first corner condition: a read has an insertion at the end, and we're right at the insertion.
// In this case, the read could have any of the inserted bases and we need to build a consensus
for (int k=0; k < cList.size(); k++) {
String s = cList.get(k).getFirst();
int cnt = cList.get(k).getSecond();
// case 1: current insertion is prefix of indel in hash map
if (s.startsWith(indelString)) {
cList.set(k,new Pair<String, Integer>(s,cnt+1));
foundKey = true;
}
else if (indelString.startsWith(s)) {
// case 2: indel stored in hash table is prefix of current insertion
// In this case, new bases are new key.
foundKey = true;
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
}
}
if (!foundKey)
// none of the above: event bases not supported by previous table, so add new key
cList.add(new Pair<String, Integer>(indelString,1));
}
else if (read.getAlignmentStart() == loc.getStart()+1) {
// opposite corner condition: read will start at current locus with an insertion
for (int k=0; k < cList.size(); k++) {
String s = cList.get(k).getFirst();
int cnt = cList.get(k).getSecond();
if (s.endsWith(indelString)) {
// case 1: current insertion (indelString) is suffix of indel in hash map (s)
cList.set(k,new Pair<String, Integer>(s,cnt+1));
foundKey = true;
}
else if (indelString.endsWith(s)) {
// case 2: indel stored in hash table is prefix of current insertion
// In this case, new bases are new key.
foundKey = true;
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
}
}
if (!foundKey)
// none of the above: event bases not supported by previous table, so add new key
cList.add(new Pair<String, Integer>(indelString,1));
}
else {
// normal case: insertion somewhere in the middle of a read: add count to arrayList
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
cList.add(new Pair<String, Integer>(indelString,cnt+1));
}
// copy back arrayList into hashMap
consensusIndelStrings.clear();
for (Pair<String,Integer> pair : cList) {
consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
}
}
else if (p.isDeletion()) {
indelString = String.format("D%d",p.getEventLength());
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
consensusIndelStrings.put(indelString,cnt+1);
}
}
}
Collection<VariantContext> vcs = new ArrayList<VariantContext>();
int maxAlleleCnt = 0;
String bestAltAllele = "";
for (String s : consensusIndelStrings.keySet()) {
int curCnt = consensusIndelStrings.get(s), stop = 0;
// if observed count if above minimum threshold, we will genotype this allele
if (curCnt < minIndelCountForGenotyping)
continue;
if (s.startsWith("D")) {
// get deletion length
int dLen = Integer.valueOf(s.substring(1));
// get ref bases of accurate deletion
int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
stop = loc.getStart() + dLen;
byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
if (Allele.acceptableAlleleBases(refBases)) {
refAllele = Allele.create(refBases, true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
}
else continue; // don't go on with this allele if refBases are non-standard
} else {
// insertion case
if (Allele.acceptableAlleleBases(s)) {
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(s, false);
stop = loc.getStart();
}
else continue; // go on to next allele if consensus insertion has any non-standard base.
}
ArrayList vcAlleles = new ArrayList<Allele>();
vcAlleles.add(refAllele);
vcAlleles.add(altAllele);
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(vcAlleles);
builder.referenceBaseForIndel(ref.getBase());
builder.noGenotypes();
if (doMultiAllelicCalls)
vcs.add(builder.make());
else {
if (curCnt > maxAlleleCnt) {
maxAlleleCnt = curCnt;
vcs.clear();
vcs.add(builder.make());
}
}
}
if (vcs.isEmpty())
return aList; // nothing else to do, no alleles passed minimum count criterion
VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
aList = new ArrayList<Allele>(mergedVC.getAlleles());
return aList;
private List<Allele> computeConsensusAlleles(ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenomeLocParser locParser) {
ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
return counter.computeConsensusAlleles(ref, contexts, contextType);
}
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);

View File

@ -118,6 +118,17 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;
/**
* Complementary argument to minIndelCnt. Only samples with at least this fraction of indel-containing reads will contribute
* to counting and overcoming the threshold minIndelCnt. This parameter ensures that in deep data you don't end
* up summing lots of super rare errors up to overcome the 5 read default threshold. Should work equally well for
* low-coverage and high-coverage samples, as low coverage samples with any indel containing reads should easily over
* come this threshold.
*/
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
/**
* This argument informs the prior probability of having an indel at a site.
*/
@ -165,6 +176,7 @@ public class UnifiedArgumentCollection {
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
uac.MIN_INDEL_FRACTION_PER_SAMPLE = MIN_INDEL_FRACTION_PER_SAMPLE;
uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
uac.INDEL_GAP_OPEN_PENALTY = INDEL_GAP_OPEN_PENALTY;
uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;

View File

@ -482,11 +482,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public void onTraversalDone(Integer result) {
logger.info("Finalizing variant report");
for ( StateKey stateKey : evaluationContexts.keySet() ) {
NewEvaluationContext nec = evaluationContexts.get(stateKey);
for ( Map.Entry<StateKey, NewEvaluationContext> ecElt : evaluationContexts.entrySet() ) {
final StateKey stateKey = ecElt.getKey();
final NewEvaluationContext nec = ecElt.getValue();
for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
ve.finalizeEvaluation();
final String veName = ve.getSimpleName(); // ve.getClass().getSimpleName();
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
Map<Field, DataPoint> datamap = scanner.getData();
@ -498,7 +500,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
if (field.get(ve) instanceof TableType) {
TableType t = (TableType) field.get(ve);
final String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
final String subTableName = veName + "." + field.getName();
final DataPoint dataPointAnn = datamap.get(field);
GATKReportTable table;
@ -539,11 +541,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
}
} else {
GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
GATKReportTable table = report.getTable(veName);
for ( VariantStratifier vs : stratificationObjects ) {
String columnName = vs.getName();
final String columnName = vs.getName();
table.set(stateKey.toString(), columnName, stateKey.get(vs.getName()));
}

View File

@ -8,6 +8,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
public abstract class VariantEvaluator {
private VariantEvalWalker walker;
private final String simpleName;
protected VariantEvaluator() {
this.simpleName = getClass().getSimpleName();
}
public void initialize(VariantEvalWalker walker) {
this.walker = walker;
@ -90,4 +95,8 @@ public abstract class VariantEvaluator {
protected static String formattedRatio(final int num, final int denom) {
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
}
public String getSimpleName() {
return simpleName;
}
}

View File

@ -27,6 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.lang.annotation.Annotation;
import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
@ -40,6 +41,7 @@ import java.util.Map;
* the object, a Mashalling object can serialize or deserialize a analysis module.
*/
public class AnalysisModuleScanner {
final private static Map<String, Annotation[]> annotationCache = new HashMap<String, Annotation[]>();
// what we extracted from the class
private Map<Field, DataPoint> datums = new LinkedHashMap<Field, DataPoint>(); // the data we've discovered
@ -84,12 +86,22 @@ public class AnalysisModuleScanner {
// get the fields from the class, and extract
for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
for (Field f : superCls.getDeclaredFields())
for (Annotation annotation : f.getAnnotations()) {
for (Annotation annotation : getAnnotations(f)) {
if (annotation.annotationType().equals(DataPoint.class))
datums.put(f,(DataPoint) annotation);
}
}
}
private Annotation[] getAnnotations(final Field field) {
final String fieldName = field.toString();
Annotation[] annotations = annotationCache.get(fieldName);
if ( annotations == null ) {
annotations = field.getAnnotations();
annotationCache.put(fieldName, annotations);
}
return annotations;
}
/**
*

View File

@ -36,6 +36,16 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
return new TreeMap<String, VariantEvaluator>(evaluationInstances);
}
public StateKey makeStateKey() {
Map<String, String> map = new HashMap<String, String>(size());
for (Map.Entry<VariantStratifier, String> elt : this.entrySet() ) {
map.put(elt.getKey().getName(), elt.getValue());
}
return new StateKey(map);
}
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
// the other updateN methods don't see a null context

View File

@ -3,25 +3,67 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util;
import java.util.Map;
import java.util.TreeMap;
public class StateKey extends TreeMap<String, String> {
// public int hashCode() {
// int hashCode = 1;
//
// for (final Map.Entry<String,String> pair : this.entrySet()) {
// hashCode *= pair.getKey().hashCode() + pair.getValue().hashCode();
// }
//
// return hashCode;
// }
/**
* A final constant class representing the specific state configuration
* for a VariantEvaluator instance.
*
* TODO optimizations to entirely remove the TreeMap and just store the HashMap for performance and use the tree for the sorted tostring function.
*/
public final class StateKey {
/** High-performance cache of the toString operation for a constant class */
private final String string;
private final TreeMap<String, String> states;
public String toString() {
String value = "";
public StateKey(final Map<String, String> states) {
this.states = new TreeMap<String, String>(states);
this.string = formatString();
}
for ( final String key : this.keySet() ) {
//value += "\tstate " + key + ":" + this.get(key) + "\n";
value += String.format("%s:%s;", key, this.get(key));
public StateKey(final StateKey toOverride, final String keyOverride, final String valueOverride) {
if ( toOverride == null ) {
this.states = new TreeMap<String, String>();
} else {
this.states = new TreeMap<String, String>(toOverride.states);
}
return value;
this.states.put(keyOverride, valueOverride);
this.string = formatString();
}
@Override
public boolean equals(final Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
final StateKey stateKey = (StateKey) o;
if (states != null ? !states.equals(stateKey.states) : stateKey.states != null) return false;
return true;
}
@Override
public int hashCode() {
return states.hashCode();
}
@Override
public String toString() {
return string;
}
private final String formatString() {
StringBuilder b = new StringBuilder();
for ( Map.Entry<String, String> entry : states.entrySet() ) {
b.append(String.format("%s:%s;", entry.getKey(), entry.getValue()));
}
return b.toString();
}
// TODO -- might be slow because of tree map
public String get(final String key) {
return states.get(key);
}
}

View File

@ -214,20 +214,9 @@ public class VariantEvalUtils {
ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec));
}
} else {
HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
StateKey stateKey = new StateKey();
for (VariantStratifier vs : ec.keySet()) {
String state = ec.get(vs);
stateKey.put(vs.getName(), state);
}
final StateKey stateKey = ec.makeStateKey();
ec.addEvaluationClassList(variantEvalWalker, stateKey, evaluationObjects);
necs.put(stateKey, ec);
return necs;
return new HashMap<StateKey, NewEvaluationContext>(Collections.singletonMap(stateKey, ec));
}
return ecs;
@ -428,14 +417,8 @@ public class VariantEvalUtils {
HashMap<VariantStratifier, List<String>> oneSetOfStates = newStateStack.pop();
VariantStratifier vs = oneSetOfStates.keySet().iterator().next();
for (String state : oneSetOfStates.get(vs)) {
StateKey newStateKey = new StateKey();
if (stateKey != null) {
newStateKey.putAll(stateKey);
}
newStateKey.put(vs.getName(), state);
for (final String state : oneSetOfStates.get(vs)) {
final StateKey newStateKey = new StateKey(stateKey, vs.getName(), state);
initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys);
}
} else {

View File

@ -17,8 +17,8 @@ import java.util.Map;
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
// --------------------------------------------------------------------------------------------------------------
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
Arrays.asList("0de4aeed6a52f08ed86a7642c812478b"));
Arrays.asList("849ee8b21b4bbb02dfc7867a4f1bc14b"));
executeTest("test Multiple SNP alleles", spec);
}
@ -335,4 +335,37 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
UserException.class);
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing SnpEff
//
// --------------------------------------------------------------------------------------------------------------
final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
+ "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
@Test
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("f08ff07ad49d388198c1887baad05977"));
executeTest("test minIndelFraction 0.0", spec);
}
@Test
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("a0945fd21369aaf68c7f1d96dbb930d1"));
executeTest("test minIndelFraction 0.25", spec);
}
@Test
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
Arrays.asList("50fe9a4c5633f6395b45d9ec1e00d56a"));
executeTest("test minIndelFraction 1.0", spec);
}
}