Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e8bb8ade1a
|
|
@ -26,14 +26,12 @@ package org.broadinstitute.sting.gatk.report;
|
|||
|
||||
import org.apache.commons.lang.math.NumberUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.TreeMap;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Holds values for a column in a GATK report table
|
||||
*/
|
||||
public class GATKReportColumn extends TreeMap<Object, Object> {
|
||||
public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
||||
final private String columnName;
|
||||
final private Object defaultValue;
|
||||
final private String format;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,286 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Code for determining which indels are segregating among the samples.
|
||||
*
|
||||
* This code is just a refactor of the original code from Guillermo in the UG.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 3/26/12
|
||||
*/
|
||||
public class ConsensusAlleleCounter {
|
||||
final protected static Logger logger = Logger.getLogger(ConsensusAlleleCounter.class);
|
||||
private final int minIndelCountForGenotyping;
|
||||
private final boolean doMultiAllelicCalls;
|
||||
private final double minFractionInOneSample;
|
||||
private final GenomeLocParser locParser;
|
||||
|
||||
public ConsensusAlleleCounter(final GenomeLocParser locParser,
|
||||
final boolean doMultiAllelicCalls,
|
||||
final int minIndelCountForGenotyping,
|
||||
final double minFractionInOneSample) {
|
||||
this.minIndelCountForGenotyping = minIndelCountForGenotyping;
|
||||
this.doMultiAllelicCalls = doMultiAllelicCalls;
|
||||
this.minFractionInOneSample = minFractionInOneSample;
|
||||
this.locParser = locParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of Alleles at this locus that may be segregating
|
||||
*
|
||||
* @param ref
|
||||
* @param contexts
|
||||
* @param contextType
|
||||
* @return
|
||||
*/
|
||||
public List<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType) {
|
||||
final Map<String, Integer> consensusIndelStrings = countConsensusAlleles(ref, contexts, contextType);
|
||||
// logger.info("Alleles at " + ref.getLocus());
|
||||
// for ( Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet() ) {
|
||||
// logger.info(" " + elt.getValue() + " => " + elt.getKey());
|
||||
// }
|
||||
return consensusCountsToAlleles(ref, consensusIndelStrings);
|
||||
}
|
||||
|
||||
//
|
||||
// TODO -- WARNING DOESN'T WORK WITH REDUCED READS
|
||||
//
|
||||
private Map<String, Integer> countConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType) {
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
|
||||
|
||||
int insCount = 0, delCount = 0;
|
||||
// quick check of total number of indels in pileup
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
insCount += indelPileup.getNumberOfInsertions();
|
||||
delCount += indelPileup.getNumberOfDeletions();
|
||||
}
|
||||
|
||||
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
|
||||
return Collections.emptyMap();
|
||||
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
// todo -- warning, can be duplicating expensive partition here
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
|
||||
final int nIndelReads = indelPileup.getNumberOfInsertions() + indelPileup.getNumberOfDeletions();
|
||||
final int nReadsOverall = indelPileup.getNumberOfElements();
|
||||
if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) {
|
||||
// if ( nIndelReads > 0 )
|
||||
// logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
|
||||
continue;
|
||||
// } else {
|
||||
// logger.info("### Keeping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
|
||||
}
|
||||
|
||||
for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
|
||||
final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
if (read == null)
|
||||
continue;
|
||||
if (ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if (DEBUG && p.isIndel()) {
|
||||
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
|
||||
read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
|
||||
p.getEventLength(),p.getType().toString(), p.getEventBases());
|
||||
}
|
||||
*/
|
||||
String indelString = p.getEventBases();
|
||||
if (p.isInsertion()) {
|
||||
boolean foundKey = false;
|
||||
// copy of hashmap into temp arrayList
|
||||
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
|
||||
}
|
||||
|
||||
if (read.getAlignmentEnd() == loc.getStart()) {
|
||||
// first corner condition: a read has an insertion at the end, and we're right at the insertion.
|
||||
// In this case, the read could have any of the inserted bases and we need to build a consensus
|
||||
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
// case 1: current insertion is prefix of indel in hash map
|
||||
if (s.startsWith(indelString)) {
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
}
|
||||
else if (indelString.startsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
foundKey = true;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
}
|
||||
else if (read.getAlignmentStart() == loc.getStart()+1) {
|
||||
// opposite corner condition: read will start at current locus with an insertion
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
if (s.endsWith(indelString)) {
|
||||
// case 1: current insertion (indelString) is suffix of indel in hash map (s)
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
}
|
||||
else if (indelString.endsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
foundKey = true;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
|
||||
}
|
||||
else {
|
||||
// normal case: insertion somewhere in the middle of a read: add count to arrayList
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
cList.add(new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
|
||||
// copy back arrayList into hashMap
|
||||
consensusIndelStrings.clear();
|
||||
for (Pair<String,Integer> pair : cList) {
|
||||
consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
|
||||
}
|
||||
|
||||
}
|
||||
else if (p.isDeletion()) {
|
||||
indelString = String.format("D%d",p.getEventLength());
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return consensusIndelStrings;
|
||||
}
|
||||
|
||||
private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
|
||||
final Map<String, Integer> consensusIndelStrings) {
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
final Collection<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
int maxAlleleCnt = 0;
|
||||
Allele refAllele, altAllele;
|
||||
|
||||
for (final Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet()) {
|
||||
final String s = elt.getKey();
|
||||
final int curCnt = elt.getValue();
|
||||
int stop = 0;
|
||||
|
||||
// if observed count if above minimum threshold, we will genotype this allele
|
||||
if (curCnt < minIndelCountForGenotyping)
|
||||
continue;
|
||||
|
||||
if (s.startsWith("D")) {
|
||||
// get deletion length
|
||||
final int dLen = Integer.valueOf(s.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
||||
stop = loc.getStart() + dLen;
|
||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases)) {
|
||||
refAllele = Allele.create(refBases, true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
else continue; // don't go on with this allele if refBases are non-standard
|
||||
} else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s)) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
stop = loc.getStart();
|
||||
}
|
||||
else continue; // go on to next allele if consensus insertion has any non-standard base.
|
||||
}
|
||||
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(Arrays.asList(refAllele, altAllele));
|
||||
builder.referenceBaseForIndel(ref.getBase());
|
||||
builder.noGenotypes();
|
||||
if (doMultiAllelicCalls)
|
||||
vcs.add(builder.make());
|
||||
else {
|
||||
if (curCnt > maxAlleleCnt) {
|
||||
maxAlleleCnt = curCnt;
|
||||
vcs.clear();
|
||||
vcs.add(builder.make());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (vcs.isEmpty())
|
||||
return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
|
||||
|
||||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
return mergedVC.getAlleles();
|
||||
}
|
||||
}
|
||||
|
|
@ -52,11 +52,9 @@ import java.util.*;
|
|||
public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
private final int HAPLOTYPE_SIZE;
|
||||
|
||||
private final int minIndelCountForGenotyping;
|
||||
private final boolean getAlleleListFromVCF;
|
||||
|
||||
private boolean DEBUG = false;
|
||||
private final boolean doMultiAllelicCalls = true;
|
||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
|
||||
|
|
@ -72,7 +70,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// gdebug removeme
|
||||
// todo -cleanup
|
||||
private GenomeLoc lastSiteVisited;
|
||||
private ArrayList<Allele> alleleList;
|
||||
private List<Allele> alleleList = new ArrayList<Allele>();
|
||||
|
||||
static {
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
|
|
@ -83,205 +81,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
super(UAC, logger);
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
alleleList = new ArrayList<Allele>();
|
||||
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
||||
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
||||
|
||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
||||
}
|
||||
|
||||
|
||||
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
|
||||
Allele refAllele = null, altAllele = null;
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||
|
||||
HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
|
||||
|
||||
int insCount = 0, delCount = 0;
|
||||
// quick check of total number of indels in pileup
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
insCount += indelPileup.getNumberOfInsertions();
|
||||
delCount += indelPileup.getNumberOfDeletions();
|
||||
}
|
||||
|
||||
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
|
||||
return aList;
|
||||
|
||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
// todo -- warning, can be duplicating expensive partition here
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
||||
|
||||
|
||||
for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
|
||||
//SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
if (read == null)
|
||||
continue;
|
||||
if (ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if (DEBUG && p.isIndel()) {
|
||||
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
|
||||
read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
|
||||
p.getEventLength(),p.getType().toString(), p.getEventBases());
|
||||
}
|
||||
*/
|
||||
|
||||
String indelString = p.getEventBases();
|
||||
if (p.isInsertion()) {
|
||||
boolean foundKey = false;
|
||||
// copy of hashmap into temp arrayList
|
||||
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
|
||||
}
|
||||
|
||||
if (read.getAlignmentEnd() == loc.getStart()) {
|
||||
// first corner condition: a read has an insertion at the end, and we're right at the insertion.
|
||||
// In this case, the read could have any of the inserted bases and we need to build a consensus
|
||||
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
// case 1: current insertion is prefix of indel in hash map
|
||||
if (s.startsWith(indelString)) {
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
}
|
||||
else if (indelString.startsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
foundKey = true;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
}
|
||||
else if (read.getAlignmentStart() == loc.getStart()+1) {
|
||||
// opposite corner condition: read will start at current locus with an insertion
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
if (s.endsWith(indelString)) {
|
||||
// case 1: current insertion (indelString) is suffix of indel in hash map (s)
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
}
|
||||
else if (indelString.endsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
foundKey = true;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
|
||||
}
|
||||
else {
|
||||
// normal case: insertion somewhere in the middle of a read: add count to arrayList
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
cList.add(new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
|
||||
// copy back arrayList into hashMap
|
||||
consensusIndelStrings.clear();
|
||||
for (Pair<String,Integer> pair : cList) {
|
||||
consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
|
||||
}
|
||||
|
||||
}
|
||||
else if (p.isDeletion()) {
|
||||
indelString = String.format("D%d",p.getEventLength());
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Collection<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
int maxAlleleCnt = 0;
|
||||
String bestAltAllele = "";
|
||||
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
int curCnt = consensusIndelStrings.get(s), stop = 0;
|
||||
// if observed count if above minimum threshold, we will genotype this allele
|
||||
if (curCnt < minIndelCountForGenotyping)
|
||||
continue;
|
||||
|
||||
if (s.startsWith("D")) {
|
||||
// get deletion length
|
||||
int dLen = Integer.valueOf(s.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
||||
stop = loc.getStart() + dLen;
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases)) {
|
||||
refAllele = Allele.create(refBases, true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
else continue; // don't go on with this allele if refBases are non-standard
|
||||
} else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s)) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
stop = loc.getStart();
|
||||
}
|
||||
else continue; // go on to next allele if consensus insertion has any non-standard base.
|
||||
}
|
||||
|
||||
|
||||
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||
vcAlleles.add(refAllele);
|
||||
vcAlleles.add(altAllele);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(ref.getBase());
|
||||
builder.noGenotypes();
|
||||
if (doMultiAllelicCalls)
|
||||
vcs.add(builder.make());
|
||||
else {
|
||||
if (curCnt > maxAlleleCnt) {
|
||||
maxAlleleCnt = curCnt;
|
||||
vcs.clear();
|
||||
vcs.add(builder.make());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (vcs.isEmpty())
|
||||
return aList; // nothing else to do, no alleles passed minimum count criterion
|
||||
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
|
||||
aList = new ArrayList<Allele>(mergedVC.getAlleles());
|
||||
|
||||
return aList;
|
||||
|
||||
private List<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenomeLocParser locParser) {
|
||||
ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
|
||||
return counter.computeConsensusAlleles(ref, contexts, contextType);
|
||||
}
|
||||
|
||||
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
|
||||
|
|
|
|||
|
|
@ -118,6 +118,17 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
|
||||
public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;
|
||||
|
||||
/**
|
||||
* Complementary argument to minIndelCnt. Only samples with at least this fraction of indel-containing reads will contribute
|
||||
* to counting and overcoming the threshold minIndelCnt. This parameter ensures that in deep data you don't end
|
||||
* up summing lots of super rare errors up to overcome the 5 read default threshold. Should work equally well for
|
||||
* low-coverage and high-coverage samples, as low coverage samples with any indel containing reads should easily over
|
||||
* come this threshold.
|
||||
*/
|
||||
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
|
||||
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
|
||||
|
||||
|
||||
/**
|
||||
* This argument informs the prior probability of having an indel at a site.
|
||||
*/
|
||||
|
|
@ -165,6 +176,7 @@ public class UnifiedArgumentCollection {
|
|||
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
|
||||
uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
|
||||
uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||
uac.MIN_INDEL_FRACTION_PER_SAMPLE = MIN_INDEL_FRACTION_PER_SAMPLE;
|
||||
uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
|
||||
uac.INDEL_GAP_OPEN_PENALTY = INDEL_GAP_OPEN_PENALTY;
|
||||
uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
|
||||
|
|
|
|||
|
|
@ -482,11 +482,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
public void onTraversalDone(Integer result) {
|
||||
logger.info("Finalizing variant report");
|
||||
|
||||
for ( StateKey stateKey : evaluationContexts.keySet() ) {
|
||||
NewEvaluationContext nec = evaluationContexts.get(stateKey);
|
||||
for ( Map.Entry<StateKey, NewEvaluationContext> ecElt : evaluationContexts.entrySet() ) {
|
||||
final StateKey stateKey = ecElt.getKey();
|
||||
final NewEvaluationContext nec = ecElt.getValue();
|
||||
|
||||
for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
|
||||
ve.finalizeEvaluation();
|
||||
final String veName = ve.getSimpleName(); // ve.getClass().getSimpleName();
|
||||
|
||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
||||
Map<Field, DataPoint> datamap = scanner.getData();
|
||||
|
|
@ -498,7 +500,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
if (field.get(ve) instanceof TableType) {
|
||||
TableType t = (TableType) field.get(ve);
|
||||
|
||||
final String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
|
||||
final String subTableName = veName + "." + field.getName();
|
||||
final DataPoint dataPointAnn = datamap.get(field);
|
||||
|
||||
GATKReportTable table;
|
||||
|
|
@ -539,11 +541,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
}
|
||||
} else {
|
||||
GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
|
||||
GATKReportTable table = report.getTable(veName);
|
||||
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
String columnName = vs.getName();
|
||||
|
||||
final String columnName = vs.getName();
|
||||
table.set(stateKey.toString(), columnName, stateKey.get(vs.getName()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
public abstract class VariantEvaluator {
|
||||
private VariantEvalWalker walker;
|
||||
private final String simpleName;
|
||||
|
||||
protected VariantEvaluator() {
|
||||
this.simpleName = getClass().getSimpleName();
|
||||
}
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
this.walker = walker;
|
||||
|
|
@ -90,4 +95,8 @@ public abstract class VariantEvaluator {
|
|||
protected static String formattedRatio(final int num, final int denom) {
|
||||
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
|
||||
}
|
||||
|
||||
public String getSimpleName() {
|
||||
return simpleName;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -40,6 +41,7 @@ import java.util.Map;
|
|||
* the object, a Mashalling object can serialize or deserialize a analysis module.
|
||||
*/
|
||||
public class AnalysisModuleScanner {
|
||||
final private static Map<String, Annotation[]> annotationCache = new HashMap<String, Annotation[]>();
|
||||
|
||||
// what we extracted from the class
|
||||
private Map<Field, DataPoint> datums = new LinkedHashMap<Field, DataPoint>(); // the data we've discovered
|
||||
|
|
@ -84,12 +86,22 @@ public class AnalysisModuleScanner {
|
|||
// get the fields from the class, and extract
|
||||
for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
|
||||
for (Field f : superCls.getDeclaredFields())
|
||||
for (Annotation annotation : f.getAnnotations()) {
|
||||
for (Annotation annotation : getAnnotations(f)) {
|
||||
if (annotation.annotationType().equals(DataPoint.class))
|
||||
datums.put(f,(DataPoint) annotation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Annotation[] getAnnotations(final Field field) {
|
||||
final String fieldName = field.toString();
|
||||
Annotation[] annotations = annotationCache.get(fieldName);
|
||||
if ( annotations == null ) {
|
||||
annotations = field.getAnnotations();
|
||||
annotationCache.put(fieldName, annotations);
|
||||
}
|
||||
return annotations;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
|
|||
|
|
@ -36,6 +36,16 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
|||
return new TreeMap<String, VariantEvaluator>(evaluationInstances);
|
||||
}
|
||||
|
||||
public StateKey makeStateKey() {
|
||||
Map<String, String> map = new HashMap<String, String>(size());
|
||||
|
||||
for (Map.Entry<VariantStratifier, String> elt : this.entrySet() ) {
|
||||
map.put(elt.getKey().getName(), elt.getValue());
|
||||
}
|
||||
|
||||
return new StateKey(map);
|
||||
}
|
||||
|
||||
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
|
||||
for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
|
||||
// the other updateN methods don't see a null context
|
||||
|
|
|
|||
|
|
@ -3,25 +3,67 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
public class StateKey extends TreeMap<String, String> {
|
||||
// public int hashCode() {
|
||||
// int hashCode = 1;
|
||||
//
|
||||
// for (final Map.Entry<String,String> pair : this.entrySet()) {
|
||||
// hashCode *= pair.getKey().hashCode() + pair.getValue().hashCode();
|
||||
// }
|
||||
//
|
||||
// return hashCode;
|
||||
// }
|
||||
/**
|
||||
* A final constant class representing the specific state configuration
|
||||
* for a VariantEvaluator instance.
|
||||
*
|
||||
* TODO optimizations to entirely remove the TreeMap and just store the HashMap for performance and use the tree for the sorted tostring function.
|
||||
*/
|
||||
public final class StateKey {
|
||||
/** High-performance cache of the toString operation for a constant class */
|
||||
private final String string;
|
||||
private final TreeMap<String, String> states;
|
||||
|
||||
public String toString() {
|
||||
String value = "";
|
||||
public StateKey(final Map<String, String> states) {
|
||||
this.states = new TreeMap<String, String>(states);
|
||||
this.string = formatString();
|
||||
}
|
||||
|
||||
for ( final String key : this.keySet() ) {
|
||||
//value += "\tstate " + key + ":" + this.get(key) + "\n";
|
||||
value += String.format("%s:%s;", key, this.get(key));
|
||||
public StateKey(final StateKey toOverride, final String keyOverride, final String valueOverride) {
|
||||
if ( toOverride == null ) {
|
||||
this.states = new TreeMap<String, String>();
|
||||
} else {
|
||||
this.states = new TreeMap<String, String>(toOverride.states);
|
||||
}
|
||||
|
||||
return value;
|
||||
this.states.put(keyOverride, valueOverride);
|
||||
this.string = formatString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
final StateKey stateKey = (StateKey) o;
|
||||
|
||||
if (states != null ? !states.equals(stateKey.states) : stateKey.states != null) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return states.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return string;
|
||||
}
|
||||
|
||||
private final String formatString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
for ( Map.Entry<String, String> entry : states.entrySet() ) {
|
||||
b.append(String.format("%s:%s;", entry.getKey(), entry.getValue()));
|
||||
}
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
// TODO -- might be slow because of tree map
|
||||
public String get(final String key) {
|
||||
return states.get(key);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -214,20 +214,9 @@ public class VariantEvalUtils {
|
|||
ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec));
|
||||
}
|
||||
} else {
|
||||
HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
|
||||
|
||||
StateKey stateKey = new StateKey();
|
||||
for (VariantStratifier vs : ec.keySet()) {
|
||||
String state = ec.get(vs);
|
||||
|
||||
stateKey.put(vs.getName(), state);
|
||||
}
|
||||
|
||||
final StateKey stateKey = ec.makeStateKey();
|
||||
ec.addEvaluationClassList(variantEvalWalker, stateKey, evaluationObjects);
|
||||
|
||||
necs.put(stateKey, ec);
|
||||
|
||||
return necs;
|
||||
return new HashMap<StateKey, NewEvaluationContext>(Collections.singletonMap(stateKey, ec));
|
||||
}
|
||||
|
||||
return ecs;
|
||||
|
|
@ -428,14 +417,8 @@ public class VariantEvalUtils {
|
|||
HashMap<VariantStratifier, List<String>> oneSetOfStates = newStateStack.pop();
|
||||
VariantStratifier vs = oneSetOfStates.keySet().iterator().next();
|
||||
|
||||
for (String state : oneSetOfStates.get(vs)) {
|
||||
StateKey newStateKey = new StateKey();
|
||||
if (stateKey != null) {
|
||||
newStateKey.putAll(stateKey);
|
||||
}
|
||||
|
||||
newStateKey.put(vs.getName(), state);
|
||||
|
||||
for (final String state : oneSetOfStates.get(vs)) {
|
||||
final StateKey newStateKey = new StateKey(stateKey, vs.getName(), state);
|
||||
initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys);
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ import java.util.Map;
|
|||
|
||||
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("0de4aeed6a52f08ed86a7642c812478b"));
|
||||
Arrays.asList("849ee8b21b4bbb02dfc7867a4f1bc14b"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -335,4 +335,37 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
UserException.class);
|
||||
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing SnpEff
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
|
||||
+ "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("f08ff07ad49d388198c1887baad05977"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("a0945fd21369aaf68c7f1d96dbb930d1"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("50fe9a4c5633f6395b45d9ec1e00d56a"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue