Fix merge conflicts
This commit is contained in:
commit
bfb73e1c5b
|
|
@ -0,0 +1,21 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocComparator;
|
||||||
|
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A stash of regions that must be kept uncompressed in all samples
|
||||||
|
*
|
||||||
|
* In general, these are regions that were kept uncompressed by a tumor sample and we want to force
|
||||||
|
* all other samples (normals and/or tumors) to also keep these regions uncompressed
|
||||||
|
*
|
||||||
|
* User: carneiro
|
||||||
|
* Date: 10/15/12
|
||||||
|
* Time: 4:08 PM
|
||||||
|
*/
|
||||||
|
public class CompressionStash extends TreeSet<SimpleGenomeLoc> {
|
||||||
|
public CompressionStash() {
|
||||||
|
super(new GenomeLocComparator());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -55,11 +55,12 @@ public class MultiSampleCompressor implements Compressor {
|
||||||
final int minBaseQual,
|
final int minBaseQual,
|
||||||
final ReduceReads.DownsampleStrategy downsampleStrategy,
|
final ReduceReads.DownsampleStrategy downsampleStrategy,
|
||||||
final int nContigs,
|
final int nContigs,
|
||||||
final boolean allowPolyploidReduction) {
|
final boolean allowPolyploidReduction,
|
||||||
|
final CompressionStash compressionStash) {
|
||||||
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
|
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
|
||||||
compressorsPerSample.put(name,
|
compressorsPerSample.put(name,
|
||||||
new SingleSampleCompressor(contextSize, downsampleCoverage,
|
new SingleSampleCompressor(contextSize, downsampleCoverage,
|
||||||
minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction));
|
minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction, compressionStash));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -222,6 +222,8 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
||||||
HashMap<String, Long> readNameHash; // This hash will keep the name of the original read the new compressed name (a number).
|
HashMap<String, Long> readNameHash; // This hash will keep the name of the original read the new compressed name (a number).
|
||||||
Long nextReadNumber = 1L; // The next number to use for the compressed read name.
|
Long nextReadNumber = 1L; // The next number to use for the compressed read name.
|
||||||
|
|
||||||
|
CompressionStash compressionStash = new CompressionStash();
|
||||||
|
|
||||||
SortedSet<GenomeLoc> intervalList;
|
SortedSet<GenomeLoc> intervalList;
|
||||||
|
|
||||||
private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag
|
private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag
|
||||||
|
|
@ -328,7 +330,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public ReduceReadsStash reduceInit() {
|
public ReduceReadsStash reduceInit() {
|
||||||
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION));
|
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION, compressionStash));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ public class SingleSampleCompressor implements Compressor {
|
||||||
final private ReduceReads.DownsampleStrategy downsampleStrategy;
|
final private ReduceReads.DownsampleStrategy downsampleStrategy;
|
||||||
final private int nContigs;
|
final private int nContigs;
|
||||||
final private boolean allowPolyploidReduction;
|
final private boolean allowPolyploidReduction;
|
||||||
|
final CompressionStash compressionStash;
|
||||||
|
|
||||||
private SlidingWindow slidingWindow;
|
private SlidingWindow slidingWindow;
|
||||||
private int slidingWindowCounter;
|
private int slidingWindowCounter;
|
||||||
|
|
@ -33,7 +34,8 @@ public class SingleSampleCompressor implements Compressor {
|
||||||
final int minBaseQual,
|
final int minBaseQual,
|
||||||
final ReduceReads.DownsampleStrategy downsampleStrategy,
|
final ReduceReads.DownsampleStrategy downsampleStrategy,
|
||||||
final int nContigs,
|
final int nContigs,
|
||||||
final boolean allowPolyploidReduction) {
|
final boolean allowPolyploidReduction,
|
||||||
|
final CompressionStash compressionStash) {
|
||||||
this.contextSize = contextSize;
|
this.contextSize = contextSize;
|
||||||
this.downsampleCoverage = downsampleCoverage;
|
this.downsampleCoverage = downsampleCoverage;
|
||||||
this.minMappingQuality = minMappingQuality;
|
this.minMappingQuality = minMappingQuality;
|
||||||
|
|
@ -44,6 +46,7 @@ public class SingleSampleCompressor implements Compressor {
|
||||||
this.downsampleStrategy = downsampleStrategy;
|
this.downsampleStrategy = downsampleStrategy;
|
||||||
this.nContigs = nContigs;
|
this.nContigs = nContigs;
|
||||||
this.allowPolyploidReduction = allowPolyploidReduction;
|
this.allowPolyploidReduction = allowPolyploidReduction;
|
||||||
|
this.compressionStash = compressionStash;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -65,7 +68,7 @@ public class SingleSampleCompressor implements Compressor {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( slidingWindow == null) { // this is the first read
|
if ( slidingWindow == null) { // this is the first read
|
||||||
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction);
|
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction, compressionStash);
|
||||||
slidingWindowCounter++;
|
slidingWindowCounter++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
|
|
@ -56,6 +55,7 @@ public class SlidingWindow {
|
||||||
private final int nContigs;
|
private final int nContigs;
|
||||||
|
|
||||||
private boolean allowPolyploidReductionInGeneral;
|
private boolean allowPolyploidReductionInGeneral;
|
||||||
|
private CompressionStash compressionStash;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The types of synthetic reads to use in the finalizeAndAdd method
|
* The types of synthetic reads to use in the finalizeAndAdd method
|
||||||
|
|
@ -87,7 +87,7 @@ public class SlidingWindow {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction) {
|
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction, CompressionStash compressionStash) {
|
||||||
this.contextSize = contextSize;
|
this.contextSize = contextSize;
|
||||||
this.downsampleCoverage = downsampleCoverage;
|
this.downsampleCoverage = downsampleCoverage;
|
||||||
|
|
||||||
|
|
@ -118,6 +118,7 @@ public class SlidingWindow {
|
||||||
this.nContigs = nContigs;
|
this.nContigs = nContigs;
|
||||||
|
|
||||||
this.allowPolyploidReductionInGeneral = allowPolyploidReduction;
|
this.allowPolyploidReductionInGeneral = allowPolyploidReduction;
|
||||||
|
this.compressionStash = compressionStash;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -145,7 +146,7 @@ public class SlidingWindow {
|
||||||
* @param variantSite boolean array with true marking variant regions
|
* @param variantSite boolean array with true marking variant regions
|
||||||
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
|
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
|
||||||
*/
|
*/
|
||||||
private Pair<Integer, Integer> getNextVariantRegion(int from, int to, boolean[] variantSite) {
|
private SimpleGenomeLoc getNextVariantRegion(int from, int to, boolean[] variantSite) {
|
||||||
boolean foundStart = false;
|
boolean foundStart = false;
|
||||||
int variantRegionStartIndex = 0;
|
int variantRegionStartIndex = 0;
|
||||||
for (int i=from; i<to; i++) {
|
for (int i=from; i<to; i++) {
|
||||||
|
|
@ -154,10 +155,10 @@ public class SlidingWindow {
|
||||||
foundStart = true;
|
foundStart = true;
|
||||||
}
|
}
|
||||||
else if(!variantSite[i] && foundStart) {
|
else if(!variantSite[i] && foundStart) {
|
||||||
return(new Pair<Integer, Integer>(variantRegionStartIndex, i-1));
|
return(new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, i-1, true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (foundStart) ? new Pair<Integer, Integer>(variantRegionStartIndex, -1) : null;
|
return (foundStart) ? new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, to-1, false) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -168,23 +169,22 @@ public class SlidingWindow {
|
||||||
* @param variantSite boolean array with true marking variant regions
|
* @param variantSite boolean array with true marking variant regions
|
||||||
* @return a list with start/stops of variant regions following getNextVariantRegion description
|
* @return a list with start/stops of variant regions following getNextVariantRegion description
|
||||||
*/
|
*/
|
||||||
private List<Pair<Integer, Integer>> getAllVariantRegions(int from, int to, boolean[] variantSite) {
|
private CompressionStash getVariantRegionsFromThisSample(int from, int to, boolean[] variantSite) {
|
||||||
List<Pair<Integer,Integer>> regions = new LinkedList<Pair<Integer, Integer>>();
|
CompressionStash regions = new CompressionStash();
|
||||||
int index = from;
|
int index = from;
|
||||||
while(index < to) {
|
while(index < to) {
|
||||||
Pair<Integer,Integer> result = getNextVariantRegion(index, to, variantSite);
|
SimpleGenomeLoc result = getNextVariantRegion(index, to, variantSite);
|
||||||
if (result == null)
|
if (result == null)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
regions.add(result);
|
regions.add(result);
|
||||||
if (result.getSecond() < 0)
|
if (result.getStop() < 0)
|
||||||
break;
|
break;
|
||||||
index = result.getSecond() + 1;
|
index = result.getStop() + 1;
|
||||||
}
|
}
|
||||||
return regions;
|
return regions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines if the window can be slid given the new incoming read.
|
* Determines if the window can be slid given the new incoming read.
|
||||||
*
|
*
|
||||||
|
|
@ -203,7 +203,7 @@ public class SlidingWindow {
|
||||||
boolean[] variantSite = markSites(getStartLocation(windowHeader) + readStartHeaderIndex);
|
boolean[] variantSite = markSites(getStartLocation(windowHeader) + readStartHeaderIndex);
|
||||||
int breakpoint = Math.max(readStartHeaderIndex - contextSize - 1, 0); // this is the limit of what we can close/send to consensus (non-inclusive)
|
int breakpoint = Math.max(readStartHeaderIndex - contextSize - 1, 0); // this is the limit of what we can close/send to consensus (non-inclusive)
|
||||||
|
|
||||||
List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, breakpoint, variantSite);
|
CompressionStash regions = getVariantRegionsFromThisSample(0, breakpoint, variantSite);
|
||||||
finalizedReads = closeVariantRegions(regions, false);
|
finalizedReads = closeVariantRegions(regions, false);
|
||||||
|
|
||||||
List<GATKSAMRecord> readsToRemove = new LinkedList<GATKSAMRecord>();
|
List<GATKSAMRecord> readsToRemove = new LinkedList<GATKSAMRecord>();
|
||||||
|
|
@ -567,26 +567,31 @@ public class SlidingWindow {
|
||||||
result.addAll(addToSyntheticReads(windowHeader, 0, stop, false));
|
result.addAll(addToSyntheticReads(windowHeader, 0, stop, false));
|
||||||
result.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
result.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||||
|
|
||||||
return result; // finalized reads will be downsampled if necessary
|
return result; // finalized reads will be downsampled if necessary
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<GATKSAMRecord> closeVariantRegions(List<Pair<Integer, Integer>> regions, boolean forceClose) {
|
private List<GATKSAMRecord> closeVariantRegions(CompressionStash regions, boolean forceClose) {
|
||||||
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
|
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
|
||||||
if (!regions.isEmpty()) {
|
if (!regions.isEmpty()) {
|
||||||
int lastStop = -1;
|
int lastStop = -1;
|
||||||
for (Pair<Integer, Integer> region : regions) {
|
for (SimpleGenomeLoc region : regions) {
|
||||||
int start = region.getFirst();
|
int start = region.getStart();
|
||||||
int stop = region.getSecond();
|
int stop = region.getStop();
|
||||||
if (stop < 0 && forceClose)
|
|
||||||
stop = windowHeader.size() - 1;
|
if (!region.isFinished()) {
|
||||||
if (stop >= 0) {
|
if(forceClose) // region is unfinished but we're forcing the close of this window
|
||||||
allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
|
stop = windowHeader.size() - 1;
|
||||||
lastStop = stop;
|
else
|
||||||
|
continue; // region is unfinished and we're not forcing the close of this window
|
||||||
}
|
}
|
||||||
|
|
||||||
|
allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
|
||||||
|
lastStop = stop;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
|
|
||||||
windowHeader.remove(); // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
|
for (int i = 0; i < lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
|
||||||
|
windowHeader.remove(); // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
|
||||||
}
|
}
|
||||||
return allReads;
|
return allReads;
|
||||||
}
|
}
|
||||||
|
|
@ -626,7 +631,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
if (!windowHeader.isEmpty()) {
|
if (!windowHeader.isEmpty()) {
|
||||||
boolean[] variantSite = markSites(getStopLocation(windowHeader) + 1);
|
boolean[] variantSite = markSites(getStopLocation(windowHeader) + 1);
|
||||||
List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, windowHeader.size(), variantSite);
|
CompressionStash regions = getVariantRegionsFromThisSample(0, windowHeader.size(), variantSite);
|
||||||
finalizedReads = closeVariantRegions(regions, true);
|
finalizedReads = closeVariantRegions(regions, true);
|
||||||
|
|
||||||
if (!windowHeader.isEmpty()) {
|
if (!windowHeader.isEmpty()) {
|
||||||
|
|
|
||||||
|
|
@ -5,23 +5,22 @@ import org.apache.log4j.Logger;
|
||||||
import org.apache.log4j.TTCCLayout;
|
import org.apache.log4j.TTCCLayout;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||||
|
|
||||||
import java.io.FileOutputStream;
|
import java.io.*;
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created with IntelliJ IDEA.
|
* A simple GATK utility (i.e, runs from command-line) for assessing the performance of
|
||||||
* User: depristo
|
* the exact model
|
||||||
* Date: 10/2/12
|
|
||||||
* Time: 10:25 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
public class AFCalcPerformanceTest {
|
public class AFCalcPerformanceTest {
|
||||||
final static Logger logger = Logger.getLogger(AFCalcPerformanceTest.class);
|
final static Logger logger = Logger.getLogger(AFCalcPerformanceTest.class);
|
||||||
|
|
@ -190,7 +189,8 @@ public class AFCalcPerformanceTest {
|
||||||
|
|
||||||
public enum Operation {
|
public enum Operation {
|
||||||
ANALYZE,
|
ANALYZE,
|
||||||
SINGLE
|
SINGLE,
|
||||||
|
EXACT_LOG
|
||||||
}
|
}
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final TTCCLayout layout = new TTCCLayout();
|
final TTCCLayout layout = new TTCCLayout();
|
||||||
|
|
@ -204,10 +204,49 @@ public class AFCalcPerformanceTest {
|
||||||
switch ( op ) {
|
switch ( op ) {
|
||||||
case ANALYZE: analyze(args); break;
|
case ANALYZE: analyze(args); break;
|
||||||
case SINGLE: profileBig(args); break;
|
case SINGLE: profileBig(args); break;
|
||||||
|
case EXACT_LOG: exactLog(args); break;
|
||||||
default: throw new IllegalAccessException("unknown operation " + op);
|
default: throw new IllegalAccessException("unknown operation " + op);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void exactLog(final String[] args) throws Exception {
|
||||||
|
final File ref = new File(args[1]);
|
||||||
|
final File exactLogFile = new File(args[2]);
|
||||||
|
final List<Integer> startsToUse = new LinkedList<Integer>();
|
||||||
|
|
||||||
|
for ( int i = 3; i < args.length; i++ )
|
||||||
|
startsToUse.add(Integer.valueOf(args[i]));
|
||||||
|
|
||||||
|
final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(ref);
|
||||||
|
final GenomeLocParser parser = new GenomeLocParser(seq);
|
||||||
|
final BufferedReader reader = new BufferedReader(new FileReader(exactLogFile));
|
||||||
|
final List<ExactCallLogger.ExactCall> loggedCalls = ExactCallLogger.readExactLog(reader, startsToUse, parser);
|
||||||
|
|
||||||
|
for ( final ExactCallLogger.ExactCall call : loggedCalls ) {
|
||||||
|
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1,
|
||||||
|
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
|
||||||
|
AFCalcTestBuilder.PriorType.human);
|
||||||
|
logger.info(call);
|
||||||
|
final SimpleTimer timer = new SimpleTimer().start();
|
||||||
|
final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors());
|
||||||
|
final long newNanoTime = timer.getElapsedTimeNano();
|
||||||
|
if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) {
|
||||||
|
logger.info("**** ONE IS POLY");
|
||||||
|
}
|
||||||
|
logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0());
|
||||||
|
final double speedup = call.runtime / (1.0 * newNanoTime);
|
||||||
|
logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x");
|
||||||
|
for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) {
|
||||||
|
if ( a.isNonReference() ) {
|
||||||
|
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
|
||||||
|
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
|
||||||
|
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
|
||||||
|
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void profileBig(final String[] args) throws Exception {
|
private static void profileBig(final String[] args) throws Exception {
|
||||||
final int nSamples = Integer.valueOf(args[1]);
|
final int nSamples = Integer.valueOf(args[1]);
|
||||||
final int ac = Integer.valueOf(args[2]);
|
final int ac = Integer.valueOf(args[2]);
|
||||||
|
|
@ -234,7 +273,6 @@ public class AFCalcPerformanceTest {
|
||||||
final List<ModelParams> modelParams = Arrays.asList(
|
final List<ModelParams> modelParams = Arrays.asList(
|
||||||
new ModelParams(AFCalcFactory.Calculation.EXACT_REFERENCE, 10000, 10),
|
new ModelParams(AFCalcFactory.Calculation.EXACT_REFERENCE, 10000, 10),
|
||||||
// new ModelParams(AFCalcTestBuilder.ModelType.GeneralExact, 100, 10),
|
// new ModelParams(AFCalcTestBuilder.ModelType.GeneralExact, 100, 10),
|
||||||
new ModelParams(AFCalcFactory.Calculation.EXACT_CONSTRAINED, 10000, 100),
|
|
||||||
new ModelParams(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 10000, 1000));
|
new ModelParams(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 10000, 1000));
|
||||||
|
|
||||||
final boolean ONLY_HUMAN_PRIORS = false;
|
final boolean ONLY_HUMAN_PRIORS = false;
|
||||||
|
|
|
||||||
|
|
@ -45,12 +45,16 @@ public class AFCalcTestBuilder {
|
||||||
human
|
human
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getNumAltAlleles() {
|
||||||
|
return numAltAlleles;
|
||||||
|
}
|
||||||
|
|
||||||
public int getnSamples() {
|
public int getnSamples() {
|
||||||
return nSamples;
|
return nSamples;
|
||||||
}
|
}
|
||||||
|
|
||||||
public AFCalc makeModel() {
|
public AFCalc makeModel() {
|
||||||
return AFCalcFactory.createAFCalc(modelType, nSamples, 4, 4, 2);
|
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), getNumAltAlleles(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] makePriors() {
|
public double[] makePriors() {
|
||||||
|
|
|
||||||
|
|
@ -348,12 +348,14 @@ public class LikelihoodCalculationEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods
|
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods
|
||||||
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
|
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
|
||||||
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
||||||
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
||||||
for( final Allele a : call.getFirst().getAlleles() )
|
for( final Allele a : call.getFirst().getAlleles() ) {
|
||||||
likelihoodMap.add(read, a, 0.0);
|
likelihoodMap.add(read, a, 0.0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,36 +21,36 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
executeTest(testName, spec);
|
executeTest(testName, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testDefaultCompression() {
|
public void testDefaultCompression() {
|
||||||
RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f");
|
RRTest("testDefaultCompression ", L, "1f95f3193bd9f120a73c34a0087abaf6");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testMultipleIntervals() {
|
public void testMultipleIntervals() {
|
||||||
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||||
RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76");
|
RRTest("testMultipleIntervals ", intervals, "79213d6ac68d56d4d72dcf511223e424");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testHighCompression() {
|
public void testHighCompression() {
|
||||||
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047");
|
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "dab2aa8e3655139974bbe12a568363d9");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testLowCompression() {
|
public void testLowCompression() {
|
||||||
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "7c9b4a70c2c90b0a995800aa42852e63");
|
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "7c9b4a70c2c90b0a995800aa42852e63");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testIndelCompression() {
|
public void testIndelCompression() {
|
||||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973");
|
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "1255245ed4ebeacda90f0dbb4e4da081");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testFilteredDeletionCompression() {
|
public void testFilteredDeletionCompression() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
||||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("891bd6dcda66611f343e8ff25f34aaeb")));
|
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("122e4e60c4412a31d0aeb3cce879e841")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -61,20 +61,20 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
*
|
*
|
||||||
* This bam is simplified to replicate the exact bug with the three provided intervals.
|
* This bam is simplified to replicate the exact bug with the three provided intervals.
|
||||||
*/
|
*/
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testAddingReadAfterTailingTheStash() {
|
public void testAddingReadAfterTailingTheStash() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6")));
|
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("4b590269cbe3574dbdd5bdc2bc6f5f1c")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Divide by zero bug reported by GdA and users in the forum. Happens when the downsampler goes over a region where all reads get
|
* Divide by zero bug reported by GdA and users in the forum. Happens when the downsampler goes over a region where all reads get
|
||||||
* filtered out.
|
* filtered out.
|
||||||
*/
|
*/
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testDivideByZero() {
|
public void testDivideByZero() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||||
executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("93ffdc209d4cc0fc4f0169ca9be55cc2")));
|
executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("d8d066304f7c187f182bfb50f39baa0c")));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,87 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class AFCalcPerformanceUnitTest extends BaseTest {
|
||||||
|
@DataProvider(name = "ScalingTests")
|
||||||
|
public Object[][] makepolyTestProviderLotsOfAlleles() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
// list of all high-quality models in the system
|
||||||
|
final List<AFCalcFactory.Calculation> biAllelicModels = Arrays.asList(
|
||||||
|
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
|
||||||
|
AFCalcFactory.Calculation.EXACT_REFERENCE);
|
||||||
|
|
||||||
|
final List<AFCalcFactory.Calculation> multiAllelicModels = Arrays.asList(
|
||||||
|
AFCalcFactory.Calculation.EXACT_INDEPENDENT);
|
||||||
|
|
||||||
|
// for ( final int nonTypePLs : Arrays.asList(100) ) {
|
||||||
|
// for ( final int nSamples : Arrays.asList(10000) ) {
|
||||||
|
// final List<Integer> alleleCounts = Arrays.asList(50);
|
||||||
|
// for ( final int nAltAlleles : Arrays.asList(1) ) {
|
||||||
|
for ( final int nonTypePLs : Arrays.asList(100) ) {
|
||||||
|
for ( final int nSamples : Arrays.asList(100, 1000) ) {
|
||||||
|
final List<Integer> alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 50, 500);
|
||||||
|
for ( final int nAltAlleles : Arrays.asList(1, 2, 3) ) {
|
||||||
|
final List<AFCalcFactory.Calculation> models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels;
|
||||||
|
for ( final AFCalcFactory.Calculation model : models ) {
|
||||||
|
for ( final List<Integer> ACs : Utils.makePermutations(alleleCounts, nAltAlleles, true) ) {
|
||||||
|
if ( MathUtils.sum(ACs) < nSamples * 2 ) {
|
||||||
|
final AFCalcTestBuilder testBuilder
|
||||||
|
= new AFCalcTestBuilder(nSamples, nAltAlleles, model, AFCalcTestBuilder.PriorType.human);
|
||||||
|
tests.add(new Object[]{testBuilder, ACs, nonTypePLs});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
private Pair<Integer, Integer> estNumberOfEvaluations(final AFCalcTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) {
|
||||||
|
final int evalOverhead = 2; // 2
|
||||||
|
final int maxEvalsPerSamplePerAC = 3;
|
||||||
|
|
||||||
|
int minEvals = 0, maxEvals = 0;
|
||||||
|
|
||||||
|
for ( final Allele alt : vc.getAlternateAlleles() ) {
|
||||||
|
final int AC = vc.getCalledChrCount(alt);
|
||||||
|
minEvals += AC + evalOverhead; // everyone is hom-var
|
||||||
|
maxEvals += AC * maxEvalsPerSamplePerAC + 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair<Integer, Integer>(minEvals, maxEvals);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "ScalingTests")
|
||||||
|
private void testScaling(final AFCalcTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL) {
|
||||||
|
final AFCalc calc = testBuilder.makeModel();
|
||||||
|
final double[] priors = testBuilder.makePriors();
|
||||||
|
final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
|
||||||
|
final AFCalcResult result = calc.getLog10PNonRef(vc, priors);
|
||||||
|
final Pair<Integer, Integer> expectedNEvaluation = estNumberOfEvaluations(testBuilder, vc, nonTypePL);
|
||||||
|
final int minEvals = expectedNEvaluation.getFirst();
|
||||||
|
final int maxEvals = expectedNEvaluation.getSecond();
|
||||||
|
|
||||||
|
logger.warn(" min " + minEvals + " obs " + result.getnEvaluations() + " max " + maxEvals + " for test " + testBuilder + " sum(ACs)=" + (int)MathUtils.sum(ACs));
|
||||||
|
|
||||||
|
Assert.assertTrue(result.getnEvaluations() >= minEvals,
|
||||||
|
"Actual number of evaluations " + result.getnEvaluations() + " < min number of evals " + minEvals);
|
||||||
|
Assert.assertTrue(result.getnEvaluations() <= maxEvals,
|
||||||
|
"Actual number of evaluations " + result.getnEvaluations() + " > max number of evals " + minEvals);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,77 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class AFCalcResultUnitTest extends BaseTest {
|
||||||
|
private static class MyTest {
|
||||||
|
final double[] Ls, expectedPosteriors;
|
||||||
|
|
||||||
|
private MyTest(double[] ls, double[] expectedPosteriors) {
|
||||||
|
Ls = ls;
|
||||||
|
this.expectedPosteriors = expectedPosteriors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "Ls [" + Utils.join(",", Ls) + "] expectedPosteriors [" + Utils.join(",", expectedPosteriors) + "]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "TestComputePosteriors")
|
||||||
|
public Object[][] makeTestCombineGLs() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
tests.add(new Object[]{new MyTest(log10Even, log10Even)});
|
||||||
|
|
||||||
|
for ( double L0 = -1e9; L0 < 0.0; L0 /= 10.0 ) {
|
||||||
|
for ( double L1 = -1e2; L1 < 0.0; L1 /= 100.0 ) {
|
||||||
|
final double[] input = new double[]{L0, L1};
|
||||||
|
final double[] expected = MathUtils.normalizeFromLog10(input, true);
|
||||||
|
tests.add(new Object[]{new MyTest(input, expected)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( double bigBadL = -1e50; bigBadL < -1e200; bigBadL *= 10 ) {
|
||||||
|
// test that a huge bad likelihood remains, even with a massive better result
|
||||||
|
for ( final double betterL : Arrays.asList(-1000.0, -100.0, -10.0, -1.0, -0.1, -0.01, -0.001, 0.0)) {
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{bigBadL, betterL}, new double[]{bigBadL, 0.0})});
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{betterL, bigBadL}, new double[]{0.0, bigBadL})});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test that a modest bad likelihood with an ~0.0 value doesn't get lost
|
||||||
|
for ( final double badL : Arrays.asList(-10000.0, -1000.0, -100.0, -10.0)) {
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{badL, -1e-9}, new double[]{badL, 0.0})});
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})});
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
final static double[] log10Even = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
|
||||||
|
final static Allele C = Allele.create("C");
|
||||||
|
final static List<Allele> alleles = Arrays.asList(Allele.create("A", true), C);
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "TestComputePosteriors")
|
||||||
|
private void testComputingPosteriors(final MyTest data) {
|
||||||
|
final AFCalcResult result = new AFCalcResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0));
|
||||||
|
|
||||||
|
Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected");
|
||||||
|
Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected");
|
||||||
|
|
||||||
|
final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()};
|
||||||
|
Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -372,16 +372,14 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make();
|
final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make();
|
||||||
final AFCalcTestBuilder.PriorType priorType = AFCalcTestBuilder.PriorType.flat;
|
final AFCalcTestBuilder.PriorType priorType = AFCalcTestBuilder.PriorType.flat;
|
||||||
|
|
||||||
final List<AFCalcFactory.Calculation> constrainedModel = Arrays.asList(AFCalcFactory.Calculation.EXACT_CONSTRAINED);
|
|
||||||
|
|
||||||
final double TOLERANCE = 0.5;
|
final double TOLERANCE = 0.5;
|
||||||
|
|
||||||
final List<PNonRefData> initialPNonRefData = Arrays.asList(
|
final List<PNonRefData> initialPNonRefData = Arrays.asList(
|
||||||
// bi-allelic sites
|
// bi-allelic sites
|
||||||
new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, TOLERANCE, true),
|
new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, TOLERANCE, true),
|
||||||
new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false, constrainedModel),
|
new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false),
|
||||||
new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false, constrainedModel),
|
new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false),
|
||||||
new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false, constrainedModel),
|
new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false),
|
||||||
new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, TOLERANCE, true),
|
new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, TOLERANCE, true),
|
||||||
new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, TOLERANCE, true),
|
new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, TOLERANCE, true),
|
||||||
|
|
||||||
|
|
@ -448,7 +446,7 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true & ! DEBUG_ONLY, dataProvider = "Models")
|
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models")
|
||||||
public void testBiallelicPriors(final AFCalc model) {
|
public void testBiallelicPriors(final AFCalc model) {
|
||||||
|
|
||||||
for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
|
for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
|
||||||
|
|
@ -456,26 +454,29 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
|
|
||||||
for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) {
|
for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) {
|
||||||
final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
|
final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
|
||||||
final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2}), true);
|
final double nonRefPrior = (1-refPrior) / 2;
|
||||||
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
|
final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true);
|
||||||
final AFCalcResult resultTracker = cfg.execute();
|
if ( ! Double.isInfinite(priors[1]) ) {
|
||||||
final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
|
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
|
||||||
|
final AFCalcResult resultTracker = cfg.execute();
|
||||||
|
final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
|
||||||
|
|
||||||
final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
|
final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
|
||||||
final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5);
|
final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5);
|
||||||
final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior));
|
final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior));
|
||||||
final double log10NonRefPost = Math.log10(nonRefPost);
|
final double log10NonRefPost = Math.log10(nonRefPost);
|
||||||
|
|
||||||
if ( ! Double.isInfinite(log10NonRefPost) )
|
if ( ! Double.isInfinite(log10NonRefPost) )
|
||||||
Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2);
|
Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2);
|
||||||
|
|
||||||
if ( nonRefPost >= 0.9 )
|
if ( nonRefPost >= 0.9 )
|
||||||
Assert.assertTrue(resultTracker.isPolymorphic(C, -1));
|
Assert.assertTrue(resultTracker.isPolymorphic(C, -1));
|
||||||
|
|
||||||
final int expectedMLEAC = 1; // the MLE is independent of the prior
|
final int expectedMLEAC = 1; // the MLE is independent of the prior
|
||||||
Assert.assertEquals(actualAC, expectedMLEAC,
|
Assert.assertEquals(actualAC, expectedMLEAC,
|
||||||
"actual AC with priors " + log10NonRefPrior + " not expected "
|
"actual AC with priors " + log10NonRefPrior + " not expected "
|
||||||
+ expectedMLEAC + " priors " + Utils.join(",", priors));
|
+ expectedMLEAC + " priors " + Utils.join(",", priors));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,124 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
|
||||||
import org.testng.Assert;
|
|
||||||
import org.testng.annotations.DataProvider;
|
|
||||||
import org.testng.annotations.Test;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
|
||||||
public class ConstrainedAFCalculationModelUnitTest extends BaseTest {
|
|
||||||
static Allele A = Allele.create("A", true);
|
|
||||||
static Allele C = Allele.create("C");
|
|
||||||
static Allele G = Allele.create("G");
|
|
||||||
|
|
||||||
protected static Genotype makePL(final List<Allele> expectedGT, int ... pls) {
|
|
||||||
return AFCalcUnitTest.makePL(expectedGT, pls);
|
|
||||||
}
|
|
||||||
|
|
||||||
@DataProvider(name = "MaxACsToVisit")
|
|
||||||
public Object[][] makeMaxACsToVisit() {
|
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
|
||||||
|
|
||||||
final int nSamples = 10;
|
|
||||||
|
|
||||||
for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) {
|
|
||||||
final int nChrom = (nSamples - nNonInformative) * 2;
|
|
||||||
for ( int i = 0; i < nChrom; i++ ) {
|
|
||||||
// bi-allelic
|
|
||||||
tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, AFCalcFactory.Calculation.EXACT_CONSTRAINED});
|
|
||||||
|
|
||||||
// tri-allelic
|
|
||||||
for ( int j = 0; j < (nChrom - i); j++)
|
|
||||||
tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, AFCalcFactory.Calculation.EXACT_CONSTRAINED});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "MaxACsToVisit")
|
|
||||||
public void testMaxACsToVisit(final int nSamples, final List<Integer> requestedACs, final int nNonInformative, final AFCalcFactory.Calculation modelType) {
|
|
||||||
final int nAlts = requestedACs.size();
|
|
||||||
final AFCalcTestBuilder testBuilder
|
|
||||||
= new AFCalcTestBuilder(nSamples, nAlts, modelType,
|
|
||||||
AFCalcTestBuilder.PriorType.human);
|
|
||||||
|
|
||||||
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
|
|
||||||
final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc);
|
|
||||||
|
|
||||||
testExpectedACs(vc, maxACsToVisit);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void testExpectedACs(final VariantContext vc, final int[] maxACsToVisit) {
|
|
||||||
// this is necessary because cannot ensure that the tester gives us back the
|
|
||||||
// requested ACs due to rounding errors
|
|
||||||
final List<Integer> ACs = new ArrayList<Integer>();
|
|
||||||
for ( final Allele a : vc.getAlternateAlleles() )
|
|
||||||
ACs.add(vc.getCalledChrCount(a));
|
|
||||||
|
|
||||||
for ( int i = 0; i < maxACsToVisit.length; i++ ) {
|
|
||||||
Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@DataProvider(name = "MaxACsGenotypes")
|
|
||||||
public Object[][] makeMaxACsForGenotype() {
|
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
|
||||||
|
|
||||||
final List<Allele> AA = Arrays.asList(A, A);
|
|
||||||
final List<Allele> AC = Arrays.asList(A, C);
|
|
||||||
final List<Allele> CC = Arrays.asList(C, C);
|
|
||||||
final List<Allele> AG = Arrays.asList(A, G);
|
|
||||||
final List<Allele> GG = Arrays.asList(G, G);
|
|
||||||
final List<Allele> CG = Arrays.asList(C, G);
|
|
||||||
|
|
||||||
final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make();
|
|
||||||
final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make();
|
|
||||||
|
|
||||||
tests.add(new Object[]{vc2, makePL(AA, 0, 10, 10)});
|
|
||||||
tests.add(new Object[]{vc2, makePL(AC, 10, 0, 10)});
|
|
||||||
tests.add(new Object[]{vc2, makePL(CC, 10, 10, 0)});
|
|
||||||
|
|
||||||
// make sure non-informative => 0
|
|
||||||
tests.add(new Object[]{vc2, makePL(AA, 0, 0, 0)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(AA, 0, 0, 0, 0, 0, 0)});
|
|
||||||
|
|
||||||
// multi-allelics
|
|
||||||
tests.add(new Object[]{vc3, makePL(AG, 10, 10, 10, 0, 10, 10)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(CG, 10, 10, 10, 10, 0, 10)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(GG, 10, 10, 10, 10, 10, 0)});
|
|
||||||
|
|
||||||
// deal with non-informatives third alleles
|
|
||||||
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 10)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 10)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 0)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 0)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 10)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 10)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 0)});
|
|
||||||
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 0)});
|
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "MaxACsGenotypes")
|
|
||||||
private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) {
|
|
||||||
final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make();
|
|
||||||
|
|
||||||
final AFCalcTestBuilder testBuilder
|
|
||||||
= new AFCalcTestBuilder(1, vc.getNAlleles()-1, AFCalcFactory.Calculation.EXACT_CONSTRAINED,
|
|
||||||
AFCalcTestBuilder.PriorType.human);
|
|
||||||
|
|
||||||
final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc);
|
|
||||||
|
|
||||||
testExpectedACs(vc, maxACsToVisit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -55,48 +55,14 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@DataProvider(name = "TestCombineGLsWithDrops")
|
|
||||||
public Object[][] makeTestCombineGLsWithDrops() {
|
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
|
||||||
|
|
||||||
final Set<Integer> noDrops = Collections.emptySet();
|
|
||||||
final Set<Integer> drop1 = Collections.singleton(1);
|
|
||||||
final Set<Integer> drop2 = Collections.singleton(2);
|
|
||||||
|
|
||||||
// AA AB BB AC BC CC
|
|
||||||
// drop1 (B): AA AC CC
|
|
||||||
// drop2 (C): AA AB BB
|
|
||||||
tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 2, 5), noDrops});
|
|
||||||
tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 4, 9), noDrops});
|
|
||||||
tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 1, 2), drop2});
|
|
||||||
tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 3, 5), drop1});
|
|
||||||
|
|
||||||
tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(0, 2, 6), noDrops});
|
|
||||||
tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(1, 0, 2), noDrops});
|
|
||||||
tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(2, 1, 0), drop2});
|
|
||||||
tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(5, 2, 0), drop1});
|
|
||||||
|
|
||||||
tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 8,11), noDrops});
|
|
||||||
tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL( 5, 7, 0), noDrops});
|
|
||||||
tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 0, 0), drop2});
|
|
||||||
tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL(10,10, 0), drop1});
|
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
|
||||||
}
|
|
||||||
|
|
||||||
private Genotype makePL(final int ... PLs) {
|
private Genotype makePL(final int ... PLs) {
|
||||||
return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs);
|
return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "TestCombineGLs")
|
@Test(enabled = true, dataProvider = "TestCombineGLs")
|
||||||
private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
|
private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
|
||||||
testCombineGLsWithDrops(altIndex, nAlts, testg, expected, Collections.<Integer>emptySet());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "TestCombineGLsWithDrops")
|
|
||||||
private void testCombineGLsWithDrops(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected, Set<Integer> allelesToDrop) {
|
|
||||||
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
|
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
|
||||||
final Genotype combined = calc.combineGLs(testg, altIndex, allelesToDrop, nAlts);
|
final Genotype combined = calc.combineGLs(testg, altIndex, nAlts);
|
||||||
|
|
||||||
Assert.assertEquals(combined.getPL(), expected.getPL(),
|
Assert.assertEquals(combined.getPL(), expected.getPL(),
|
||||||
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
|
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
|
||||||
|
|
@ -120,22 +86,21 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
||||||
final Genotype gACG = makePL( 0, 1, 2, 3, 4, 5);
|
final Genotype gACG = makePL( 0, 1, 2, 3, 4, 5);
|
||||||
final Genotype gAGC = makePL( 0, 4, 5, 1, 3, 2);
|
final Genotype gAGC = makePL( 0, 4, 5, 1, 3, 2);
|
||||||
final Genotype gACcombined = makePL(0, 2, 5);
|
final Genotype gACcombined = makePL(0, 2, 5);
|
||||||
|
final Genotype gACcombined2 = makePL(0, 1, 4);
|
||||||
final Genotype gAGcombined = makePL(0, 4, 9);
|
final Genotype gAGcombined = makePL(0, 4, 9);
|
||||||
final Genotype gACdropped = makePL(0, 1, 2);
|
|
||||||
final Genotype gAGdropped = makePL(0, 3, 5);
|
|
||||||
|
|
||||||
// biallelic
|
// biallelic
|
||||||
tests.add(new Object[]{vcAC.genotypes(gACcombined).make(), Arrays.asList(vcAC.genotypes(gACcombined).make())});
|
tests.add(new Object[]{vcAC.genotypes(gACcombined).make(), Arrays.asList(vcAC.genotypes(gACcombined).make())});
|
||||||
|
|
||||||
// tri-allelic
|
// tri-allelic
|
||||||
tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGdropped).make())});
|
tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGcombined).make())});
|
||||||
tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACdropped).make())});
|
tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACcombined2).make())});
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test(enabled = false, dataProvider = "TestMakeAlleleConditionalContexts")
|
@Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts")
|
||||||
private void testMakeAlleleConditionalContexts(final VariantContext vc, final List<VariantContext> expectedVCs) {
|
private void testMakeAlleleConditionalContexts(final VariantContext vc, final List<VariantContext> expectedVCs) {
|
||||||
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
|
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
|
||||||
final List<VariantContext> biAllelicVCs = calc.makeAlleleConditionalContexts(vc);
|
final List<VariantContext> biAllelicVCs = calc.makeAlleleConditionalContexts(vc);
|
||||||
|
|
@ -148,7 +113,8 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles());
|
Assert.assertEquals(actual.getAlleles(), expected.getAlleles());
|
||||||
|
|
||||||
for ( int j = 0; j < actual.getNSamples(); j++ )
|
for ( int j = 0; j < actual.getNSamples(); j++ )
|
||||||
Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL());
|
Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL(),
|
||||||
|
"expected PLs " + Utils.join(",", expected.getGenotype(j).getPL()) + " not equal to actual " + Utils.join(",", actual.getGenotype(j).getPL()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GenomeLocs are very useful objects to keep track of genomic locations and perform set operations
|
||||||
|
* with them.
|
||||||
|
*
|
||||||
|
* However, GenomeLocs are bound to strict validation through the GenomeLocParser and cannot
|
||||||
|
* be created easily for small tasks that do not require the rigors of the GenomeLocParser validation
|
||||||
|
*
|
||||||
|
* SimpleGenomeLoc is a simple utility to create GenomeLocs without going through the parser. Should
|
||||||
|
* only be used outside of the engine.
|
||||||
|
*
|
||||||
|
* User: carneiro
|
||||||
|
* Date: 10/16/12
|
||||||
|
* Time: 2:07 PM
|
||||||
|
*/
|
||||||
|
public class SimpleGenomeLoc extends GenomeLoc {
|
||||||
|
private boolean finished;
|
||||||
|
|
||||||
|
public SimpleGenomeLoc(String contigName, int contigIndex, int start, int stop, boolean finished) {
|
||||||
|
super(contigName, contigIndex, start, stop);
|
||||||
|
this.finished = finished;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFinished() {
|
||||||
|
return finished;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -29,24 +29,16 @@ import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
|
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public abstract class AFCalc implements Cloneable {
|
public abstract class AFCalc implements Cloneable {
|
||||||
private final static Logger defaultLogger = Logger.getLogger(AFCalc.class);
|
private final static Logger defaultLogger = Logger.getLogger(AFCalc.class);
|
||||||
|
|
@ -58,8 +50,8 @@ public abstract class AFCalc implements Cloneable {
|
||||||
protected Logger logger = defaultLogger;
|
protected Logger logger = defaultLogger;
|
||||||
|
|
||||||
private SimpleTimer callTimer = new SimpleTimer();
|
private SimpleTimer callTimer = new SimpleTimer();
|
||||||
private PrintStream callReport = null;
|
|
||||||
private final AFCalcResultTracker resultTracker;
|
private final AFCalcResultTracker resultTracker;
|
||||||
|
private ExactCallLogger exactCallLogger = null;
|
||||||
|
|
||||||
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||||
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
||||||
|
|
@ -74,7 +66,7 @@ public abstract class AFCalc implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void enableProcessLog(final File exactCallsLog) {
|
public void enableProcessLog(final File exactCallsLog) {
|
||||||
initializeOutputFile(exactCallsLog);
|
exactCallLogger = new ExactCallLogger(exactCallsLog);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLogger(Logger logger) {
|
public void setLogger(Logger logger) {
|
||||||
|
|
@ -102,8 +94,8 @@ public abstract class AFCalc implements Cloneable {
|
||||||
final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors);
|
final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors);
|
||||||
final long nanoTime = callTimer.getElapsedTimeNano();
|
final long nanoTime = callTimer.getElapsedTimeNano();
|
||||||
|
|
||||||
if ( callReport != null )
|
if ( exactCallLogger != null )
|
||||||
printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, resultTracker.getLog10PosteriorOfAFzero());
|
exactCallLogger.printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
@ -170,55 +162,8 @@ public abstract class AFCalc implements Cloneable {
|
||||||
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
|
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Print information about the call to the calls log
|
|
||||||
//
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
private void initializeOutputFile(final File outputFile) {
|
|
||||||
try {
|
|
||||||
if (outputFile != null) {
|
|
||||||
callReport = new PrintStream( new FileOutputStream(outputFile) );
|
|
||||||
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
|
|
||||||
}
|
|
||||||
} catch ( FileNotFoundException e ) {
|
|
||||||
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void printCallInfo(final VariantContext vc,
|
|
||||||
final double[] log10AlleleFrequencyPriors,
|
|
||||||
final long runtimeNano,
|
|
||||||
final double log10PosteriorOfAFzero) {
|
|
||||||
printCallElement(vc, "type", "ignore", vc.getType());
|
|
||||||
|
|
||||||
int allelei = 0;
|
|
||||||
for ( final Allele a : vc.getAlleles() )
|
|
||||||
printCallElement(vc, "allele", allelei++, a.getDisplayString());
|
|
||||||
|
|
||||||
for ( final Genotype g : vc.getGenotypes() )
|
|
||||||
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
|
|
||||||
|
|
||||||
for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ )
|
|
||||||
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
|
|
||||||
|
|
||||||
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
|
|
||||||
printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero);
|
|
||||||
|
|
||||||
callReport.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void printCallElement(final VariantContext vc,
|
|
||||||
final Object variable,
|
|
||||||
final Object key,
|
|
||||||
final Object value) {
|
|
||||||
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
|
|
||||||
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public AFCalcResultTracker getResultTracker() {
|
public AFCalcResultTracker getResultTracker() {
|
||||||
return resultTracker;
|
return resultTracker;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -30,10 +30,6 @@ public class AFCalcFactory {
|
||||||
/** reference implementation of multi-allelic EXACT model */
|
/** reference implementation of multi-allelic EXACT model */
|
||||||
EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2, -1),
|
EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2, -1),
|
||||||
|
|
||||||
/** expt. implementation */
|
|
||||||
@Deprecated
|
|
||||||
EXACT_CONSTRAINED(ConstrainedDiploidExactAFCalc.class, 2, -1),
|
|
||||||
|
|
||||||
/** expt. implementation -- for testing only */
|
/** expt. implementation -- for testing only */
|
||||||
EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1),
|
EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1),
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,10 +31,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Describes the results of the AFCalc
|
* Describes the results of the AFCalc
|
||||||
|
|
@ -217,6 +214,14 @@ public class AFCalcResult {
|
||||||
return log10PriorsOfAC[AF1p];
|
return log10PriorsOfAC[AF1p];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
final List<String> byAllele = new LinkedList<String>();
|
||||||
|
for ( final Allele a : getAllelesUsedInGenotyping() )
|
||||||
|
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a)));
|
||||||
|
return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Are we sufficiently confidence in being non-ref that the site is considered polymorphic?
|
* Are we sufficiently confidence in being non-ref that the site is considered polymorphic?
|
||||||
*
|
*
|
||||||
|
|
@ -233,6 +238,19 @@ public class AFCalcResult {
|
||||||
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
|
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are any of the alleles polymorphic w.r.t. #isPolymorphic?
|
||||||
|
*
|
||||||
|
* @param log10minPNonRef the confidence threshold, in log10 space
|
||||||
|
* @return true if any are poly, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean anyPolymorphic(final double log10minPNonRef) {
|
||||||
|
for ( final Allele a : getAllelesUsedInGenotyping() )
|
||||||
|
if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) )
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the log10 probability that allele is segregating
|
* Returns the log10 probability that allele is segregating
|
||||||
*
|
*
|
||||||
|
|
@ -266,15 +284,7 @@ public class AFCalcResult {
|
||||||
final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length];
|
final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length];
|
||||||
for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ )
|
for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ )
|
||||||
log10UnnormalizedPosteriors[i] = log10LikelihoodsOfAC[i] + log10PriorsOfAC[i];
|
log10UnnormalizedPosteriors[i] = log10LikelihoodsOfAC[i] + log10PriorsOfAC[i];
|
||||||
|
return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
|
||||||
// necessary because the posteriors may be so skewed that the log-space normalized value isn't
|
|
||||||
// good, so we have to try both log-space normalization as well as the real-space normalization if the
|
|
||||||
// result isn't good
|
|
||||||
final double[] logNormalized = MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, true);
|
|
||||||
if ( goodLog10ProbVector(logNormalized, logNormalized.length, true) )
|
|
||||||
return logNormalized;
|
|
||||||
else
|
|
||||||
return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -1,107 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
|
||||||
import com.google.java.contract.Requires;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc {
|
|
||||||
protected ConstrainedDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) {
|
|
||||||
final int[] maxACsToConsider = computeMaxACs(vc);
|
|
||||||
resultTracker.setAClimits(maxACsToConsider);
|
|
||||||
return new StateTracker(maxACsToConsider);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the maximum ACs we need to consider for each alt allele
|
|
||||||
*
|
|
||||||
* Walks over the genotypes in VC, and computes for each alt allele the maximum
|
|
||||||
* AC we need to consider in that alt allele dimension. Does the calculation
|
|
||||||
* based on the PLs in each genotype g, choosing to update the max AC for the
|
|
||||||
* alt alleles corresponding to that PL. Only takes the first lowest PL,
|
|
||||||
* if there are multiple genotype configurations with the same PL value. It
|
|
||||||
* takes values in the order of the alt alleles.
|
|
||||||
*
|
|
||||||
* @param vc the variant context we will compute max alt alleles for
|
|
||||||
* @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the
|
|
||||||
* first alt allele.
|
|
||||||
*/
|
|
||||||
@Ensures("result != null")
|
|
||||||
protected final int[] computeMaxACs(final VariantContext vc) {
|
|
||||||
final int[] maxACs = new int[vc.getNAlleles()-1];
|
|
||||||
|
|
||||||
for ( final Genotype g : vc.getGenotypes() )
|
|
||||||
updateMaxACs(g, maxACs);
|
|
||||||
|
|
||||||
return maxACs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Update the maximum achievable allele counts in maxAC according to the PLs in g
|
|
||||||
*
|
|
||||||
* Selects the maximum genotype configuration from the PLs in g, and updates
|
|
||||||
* the maxAC for this configure. For example, if the lowest PL is for 0/1, updates
|
|
||||||
* the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for
|
|
||||||
* many number of alt alleles (determined by length of maxACs).
|
|
||||||
*
|
|
||||||
* If the max PL occurs at 0/0, updates nothing
|
|
||||||
* Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have
|
|
||||||
* the same PL value, then updates the first one.
|
|
||||||
*
|
|
||||||
* Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL,
|
|
||||||
* then only first one (1) will be updated
|
|
||||||
*
|
|
||||||
* @param g the genotype to update
|
|
||||||
* @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele)
|
|
||||||
*/
|
|
||||||
@Requires({
|
|
||||||
"g != null",
|
|
||||||
"maxACs != null",
|
|
||||||
"goodMaxACs(maxACs)"})
|
|
||||||
private void updateMaxACs(final Genotype g, final int[] maxACs) {
|
|
||||||
final int[] PLs = g.getLikelihoods().getAsPLs();
|
|
||||||
|
|
||||||
int minPLi = 0;
|
|
||||||
int minPL = PLs[0];
|
|
||||||
|
|
||||||
for ( int i = 0; i < PLs.length; i++ ) {
|
|
||||||
if ( PLs[i] < minPL ) {
|
|
||||||
minPL = PLs[i];
|
|
||||||
minPLi = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi);
|
|
||||||
updateMaxACs(maxACs, pair.alleleIndex1);
|
|
||||||
updateMaxACs(maxACs, pair.alleleIndex2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref)
|
|
||||||
*
|
|
||||||
* If alleleI == 0 => doesn't update anything
|
|
||||||
* else maxACs[alleleI - 1]++
|
|
||||||
*
|
|
||||||
* @param maxACs array of max alt allele ACs
|
|
||||||
* @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles.
|
|
||||||
*/
|
|
||||||
@Requires({
|
|
||||||
"alleleI >= 0",
|
|
||||||
"(alleleI - 1) < maxACs.length",
|
|
||||||
"goodMaxACs(maxACs)"})
|
|
||||||
private void updateMaxACs(final int[] maxACs, final int alleleI) {
|
|
||||||
if ( alleleI > 0 )
|
|
||||||
maxACs[alleleI-1]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean goodMaxACs(final int[] maxACs) {
|
|
||||||
return MathUtils.sum(maxACs) >= 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -36,7 +36,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
||||||
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
|
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker);
|
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
|
||||||
|
return new StateTracker();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,179 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format
|
||||||
|
*
|
||||||
|
* Once opened, calls can be writen to disk with printCallInfo
|
||||||
|
*/
|
||||||
|
public class ExactCallLogger implements Cloneable {
|
||||||
|
private PrintStream callReport = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new ExactCallLogger writing it's output to outputFile
|
||||||
|
*
|
||||||
|
* @param outputFile
|
||||||
|
*/
|
||||||
|
public ExactCallLogger(final File outputFile) {
|
||||||
|
try {
|
||||||
|
callReport = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFile), 10000000));
|
||||||
|
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Summarizes information about an exact call that happened
|
||||||
|
*/
|
||||||
|
public static class ExactCall {
|
||||||
|
final VariantContext vc;
|
||||||
|
final long runtime;
|
||||||
|
final AFCalcResult originalCall;
|
||||||
|
|
||||||
|
public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) {
|
||||||
|
this.vc = vc;
|
||||||
|
this.runtime = runtime;
|
||||||
|
this.originalCall = originalCall;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s",
|
||||||
|
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
|
||||||
|
originalCall.getLog10PosteriorOfAFGT0(),
|
||||||
|
new AutoFormattingTime(runtime / 1e9).toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final void printCallInfo(final VariantContext vc,
|
||||||
|
final double[] log10AlleleFrequencyPriors,
|
||||||
|
final long runtimeNano,
|
||||||
|
final AFCalcResult result) {
|
||||||
|
printCallElement(vc, "type", "ignore", vc.getType());
|
||||||
|
|
||||||
|
int allelei = 0;
|
||||||
|
for (final Allele a : vc.getAlleles())
|
||||||
|
printCallElement(vc, "allele", allelei++, a.getDisplayString());
|
||||||
|
|
||||||
|
for (final Genotype g : vc.getGenotypes())
|
||||||
|
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
|
||||||
|
|
||||||
|
for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++)
|
||||||
|
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
|
||||||
|
|
||||||
|
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
|
||||||
|
printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0());
|
||||||
|
printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0());
|
||||||
|
|
||||||
|
for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
|
||||||
|
if ( allele.isNonReference() ) {
|
||||||
|
printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
|
||||||
|
printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
callReport.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"})
|
||||||
|
private void printCallElement(final VariantContext vc,
|
||||||
|
final Object variable,
|
||||||
|
final Object key,
|
||||||
|
final Object value) {
|
||||||
|
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
|
||||||
|
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read in a list of ExactCall objects from reader, keeping only those
|
||||||
|
* with starts in startsToKeep or all sites (if this is empty)
|
||||||
|
*
|
||||||
|
* @param reader a just-opened reader sitting at the start of the file
|
||||||
|
* @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept
|
||||||
|
* @param parser a genome loc parser to create genome locs
|
||||||
|
* @return a list of ExactCall objects in reader
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException {
|
||||||
|
if ( reader == null ) throw new IllegalArgumentException("reader cannot be null");
|
||||||
|
if ( startsToKeep == null ) throw new IllegalArgumentException("startsToKeep cannot be null");
|
||||||
|
if ( parser == null ) throw new IllegalArgumentException("GenomeLocParser cannot be null");
|
||||||
|
|
||||||
|
List<ExactCall> calls = new LinkedList<ExactCall>();
|
||||||
|
|
||||||
|
// skip the header line
|
||||||
|
reader.readLine();
|
||||||
|
|
||||||
|
// skip the first "type" line
|
||||||
|
reader.readLine();
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||||
|
final double[] posteriors = new double[2];
|
||||||
|
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
|
||||||
|
final List<Integer> mle = new ArrayList<Integer>();
|
||||||
|
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
|
||||||
|
long runtimeNano = -1;
|
||||||
|
|
||||||
|
GenomeLoc currentLoc = null;
|
||||||
|
while (true) {
|
||||||
|
final String line = reader.readLine();
|
||||||
|
if (line == null)
|
||||||
|
return calls;
|
||||||
|
|
||||||
|
final String[] parts = line.split("\t");
|
||||||
|
final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
|
||||||
|
final String variable = parts[1];
|
||||||
|
final String key = parts[2];
|
||||||
|
final String value = parts[3];
|
||||||
|
|
||||||
|
if (currentLoc == null)
|
||||||
|
currentLoc = lineLoc;
|
||||||
|
|
||||||
|
if (variable.equals("type")) {
|
||||||
|
if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
|
||||||
|
builder.alleles(alleles);
|
||||||
|
final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
|
||||||
|
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
|
||||||
|
builder.genotypes(genotypes);
|
||||||
|
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
|
||||||
|
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
|
||||||
|
calls.add(new ExactCall(builder.make(), runtimeNano, result));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} else if (variable.equals("allele")) {
|
||||||
|
final boolean isRef = key.equals("0");
|
||||||
|
alleles.add(Allele.create(value, isRef));
|
||||||
|
} else if (variable.equals("PL")) {
|
||||||
|
final GenotypeBuilder gb = new GenotypeBuilder(key);
|
||||||
|
gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
|
||||||
|
genotypes.add(gb.make());
|
||||||
|
} else if (variable.equals("log10PosteriorOfAFEq0")) {
|
||||||
|
posteriors[0] = Double.valueOf(value);
|
||||||
|
} else if (variable.equals("log10PosteriorOfAFGt0")) {
|
||||||
|
posteriors[1] = Double.valueOf(value);
|
||||||
|
} else if (variable.equals("MLE")) {
|
||||||
|
mle.add(Integer.valueOf(value));
|
||||||
|
} else if (variable.equals("pNonRefByAllele")) {
|
||||||
|
final Allele a = Allele.create(key);
|
||||||
|
log10pNonRefByAllele.put(a, Double.valueOf(value));
|
||||||
|
} else if (variable.equals("runtime.nano")) {
|
||||||
|
runtimeNano = Long.valueOf(value);
|
||||||
|
} else {
|
||||||
|
// nothing to do
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -32,31 +32,97 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
/**
|
||||||
|
* Computes the conditional bi-allelic exact results
|
||||||
|
*
|
||||||
|
* Suppose vc contains 2 alt allele: A* with C and T. This function first computes:
|
||||||
|
*
|
||||||
|
* (1) P(D | AF_c > 0 && AF_t == *) [i.e., T can be anything]
|
||||||
|
*
|
||||||
|
* it then computes the conditional probability on AF_c == 0:
|
||||||
|
*
|
||||||
|
* (2) P(D | AF_t > 0 && AF_c == 0)
|
||||||
|
*
|
||||||
|
* Thinking about this visually, we have the following likelihood matrix where each cell is
|
||||||
|
* the P(D | AF_c == i && AF_t == j):
|
||||||
|
*
|
||||||
|
* 0 AF_c > 0
|
||||||
|
* -----------------
|
||||||
|
* 0 | |
|
||||||
|
* |--|-------------
|
||||||
|
* a | |
|
||||||
|
* f | |
|
||||||
|
* _ | |
|
||||||
|
* t | |
|
||||||
|
* > | |
|
||||||
|
* 0 | |
|
||||||
|
*
|
||||||
|
* What we really want to know how
|
||||||
|
*
|
||||||
|
* (3) P(D | AF_c == 0 & AF_t == 0)
|
||||||
|
*
|
||||||
|
* compares with
|
||||||
|
*
|
||||||
|
* (4) P(D | AF_c > 0 || AF_t > 0)
|
||||||
|
*
|
||||||
|
* This is effectively asking for the value in the upper left vs. the sum of all cells.
|
||||||
|
*
|
||||||
|
* This class implements the conditional likelihoods summation for any number of alt
|
||||||
|
* alleles, where each alt allele has its EXACT probability of segregating calculated by
|
||||||
|
* reducing each alt B into the case XB and computing P(D | AF_b > 0 ) as follows:
|
||||||
|
*
|
||||||
|
* Suppose we have for a A/B/C site the following GLs:
|
||||||
|
*
|
||||||
|
* AA AB BB AC BC CC
|
||||||
|
*
|
||||||
|
* and we want to get the bi-allelic GLs for X/B, where X is everything not B
|
||||||
|
*
|
||||||
|
* XX = AA + AC + CC (since X = A or C)
|
||||||
|
* XB = AB + BC
|
||||||
|
* BB = BB
|
||||||
|
*
|
||||||
|
* After each allele has its probability calculated we compute the joint posterior
|
||||||
|
* as P(D | AF_* == 0) = prod_i P (D | AF_i == 0), after applying the theta^i
|
||||||
|
* prior for the ith least likely allele.
|
||||||
|
*/
|
||||||
|
public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
|
/**
|
||||||
|
* The min. confidence of an allele to be included in the joint posterior.
|
||||||
|
*/
|
||||||
|
private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20);
|
||||||
|
|
||||||
|
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
|
||||||
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sorts AFCalcResults by their posteriors of AF > 0, so the
|
||||||
|
*/
|
||||||
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
|
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
|
||||||
@Override
|
@Override
|
||||||
public int compare(AFCalcResult o1, AFCalcResult o2) {
|
public int compare(AFCalcResult o1, AFCalcResult o2) {
|
||||||
return Double.compare(o1.getLog10LikelihoodOfAFGT0(), o2.getLog10LikelihoodOfAFGT0());
|
return Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef();
|
private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef();
|
||||||
|
|
||||||
final ReferenceDiploidExactAFCalc refModel;
|
/**
|
||||||
|
* The AFCalc model we are using to do the bi-allelic computation
|
||||||
|
*/
|
||||||
|
final AFCalc biAlleleExactModel;
|
||||||
|
|
||||||
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||||
refModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
|
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
|
|
||||||
return refModel.makeMaxLikelihood(vc, resultTracker);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call
|
||||||
|
*/
|
||||||
private static class MyAFCalcResult extends AFCalcResult {
|
private static class MyAFCalcResult extends AFCalcResult {
|
||||||
|
/**
|
||||||
|
* List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call
|
||||||
|
*/
|
||||||
final List<AFCalcResult> supporting;
|
final List<AFCalcResult> supporting;
|
||||||
|
|
||||||
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
|
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
|
||||||
|
|
@ -68,121 +134,89 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
@Override
|
@Override
|
||||||
public AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
public AFCalcResult computeLog10PNonRef(final VariantContext vc,
|
||||||
final double[] log10AlleleFrequencyPriors) {
|
final double[] log10AlleleFrequencyPriors) {
|
||||||
final double log10LikelihoodOfRef = computelog10LikelihoodOfRef(vc);
|
final List<AFCalcResult> independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors);
|
||||||
final List<AFCalcResult> independentResultTrackers = computeAlleleConditionalExact(vc, log10AlleleFrequencyPriors);
|
|
||||||
final List<AFCalcResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
|
final List<AFCalcResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
|
||||||
return combineIndependentPNonRefs(vc, log10LikelihoodOfRef, withMultiAllelicPriors);
|
return combineIndependentPNonRefs(vc, withMultiAllelicPriors);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final double computelog10LikelihoodOfRef(final VariantContext vc) {
|
|
||||||
// this value just the likelihood of AF == 0 in the special constrained multi-allelic calculation
|
|
||||||
final List<double[]> allGLs = getGLs(vc.getGenotypes(), false);
|
|
||||||
double log10LikelihoodOfHomRef = 0.0;
|
|
||||||
|
|
||||||
// TODO -- can be easily optimized (currently looks at all GLs via getGLs)
|
|
||||||
for ( int i = 0; i < allGLs.size(); i++ ) {
|
|
||||||
final double[] GLs = allGLs.get(i);
|
|
||||||
log10LikelihoodOfHomRef += GLs[0];
|
|
||||||
//log10LikelihoodOfHomRef += MathUtils.normalizeFromLog10(GLs, true)[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
return log10LikelihoodOfHomRef;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the conditional bi-allelic exact results
|
* Compute the conditional exact AFCalcResult for each allele in vc independently, returning
|
||||||
|
* the result of each, in order of the alt alleles in VC
|
||||||
*
|
*
|
||||||
* Suppose vc contains 2 alt allele: A* with C and T. This function first computes:
|
* @param vc the VariantContext we want to analyze
|
||||||
*
|
* @param log10AlleleFrequencyPriors the priors
|
||||||
* (1) P(D | AF_c > 0 && AF_t == *) [i.e., T can be anything]
|
* @return a list of the AFCalcResults for each bi-allelic sub context of vc
|
||||||
*
|
|
||||||
* it then computes the conditional probability on AF_c == 0:
|
|
||||||
*
|
|
||||||
* (2) P(D | AF_t > 0 && AF_c == 0)
|
|
||||||
*
|
|
||||||
* Thinking about this visually, we have the following likelihood matrix where each cell is
|
|
||||||
* the P(D | AF_c == i && AF_t == j):
|
|
||||||
*
|
|
||||||
* 0 AF_c > 0
|
|
||||||
* -----------------
|
|
||||||
* 0 | |
|
|
||||||
* |--|-------------
|
|
||||||
* a | |
|
|
||||||
* f | |
|
|
||||||
* _ | |
|
|
||||||
* t | |
|
|
||||||
* > | |
|
|
||||||
* 0 | |
|
|
||||||
*
|
|
||||||
* What we really want to know how
|
|
||||||
*
|
|
||||||
* (3) P(D | AF_c == 0 & AF_t == 0)
|
|
||||||
*
|
|
||||||
* compares with
|
|
||||||
*
|
|
||||||
* (4) P(D | AF_c > 0 || AF_t > 0)
|
|
||||||
*
|
|
||||||
* This is effectively asking for the value in the upper left vs. the sum of all cells.
|
|
||||||
*
|
|
||||||
* The quantity (1) is the same of all cells except those with AF_c == 0, while (2) is the
|
|
||||||
* band at the top where AF_t > 0 and AF_c == 0
|
|
||||||
*
|
|
||||||
* So (4) is actually (1) + (2).
|
|
||||||
*
|
|
||||||
* (3) is the direct inverse of the (1) and (2), as we are simultaneously calculating
|
|
||||||
*
|
|
||||||
* (1*) P(D | AF_c == 0 && AF_t == *) [i.e., T can be anything]
|
|
||||||
* (2*) P(D | AF_t == 0 && AF_c == 0) [TODO -- note this value looks like the thing we are supposed to use]
|
|
||||||
*
|
|
||||||
* This function implements the conditional likelihoods summation for any number of alt
|
|
||||||
* alleles (not just the tri-allelic case), where each subsequent variant context is
|
|
||||||
* further constrained such that each already considered allele x has AF_x == 0 in the
|
|
||||||
* compute.
|
|
||||||
*
|
|
||||||
* @param vc
|
|
||||||
* @param log10AlleleFrequencyPriors
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
protected List<AFCalcResult> computeAlleleConditionalExact(final VariantContext vc,
|
@Requires({"vc != null", "log10AlleleFrequencyPriors != null"})
|
||||||
final double[] log10AlleleFrequencyPriors) {
|
@Ensures("goodIndependentResult(vc, result)")
|
||||||
|
protected final List<AFCalcResult> computeAlleleIndependentExact(final VariantContext vc,
|
||||||
|
final double[] log10AlleleFrequencyPriors) {
|
||||||
final List<AFCalcResult> results = new LinkedList<AFCalcResult>();
|
final List<AFCalcResult> results = new LinkedList<AFCalcResult>();
|
||||||
|
|
||||||
for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) {
|
for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) {
|
||||||
final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
|
final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
|
||||||
results.add(resultTracker);
|
results.add(resultTracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
|
/**
|
||||||
|
* Helper function to ensure that the computeAlleleIndependentExact is returning reasonable results
|
||||||
|
*/
|
||||||
|
private static boolean goodIndependentResult(final VariantContext vc, final List<AFCalcResult> results) {
|
||||||
|
if ( results.size() != vc.getNAlleles() - 1) return false;
|
||||||
|
for ( int i = 0; i < results.size(); i++ ) {
|
||||||
|
if ( results.get(i).getAllelesUsedInGenotyping().size() != 2 )
|
||||||
|
return false;
|
||||||
|
if ( ! results.get(i).getAllelesUsedInGenotyping().contains(vc.getAlternateAllele(i)) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the bi-allelic variant context for each alt allele in vc with bi-allelic likelihoods, in order
|
||||||
|
*
|
||||||
|
* @param vc the variant context to split. Must have n.alt.alleles > 1
|
||||||
|
* @return a bi-allelic variant context for each alt allele in vc
|
||||||
|
*/
|
||||||
|
@Requires({"vc != null", "vc.getNAlleles() > 1"})
|
||||||
|
@Ensures("result.size() == vc.getNAlleles() - 1")
|
||||||
|
protected final List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
|
||||||
final int nAltAlleles = vc.getNAlleles() - 1;
|
final int nAltAlleles = vc.getNAlleles() - 1;
|
||||||
final List<VariantContext> vcs = new LinkedList<VariantContext>();
|
final List<VariantContext> vcs = new LinkedList<VariantContext>();
|
||||||
|
|
||||||
final List<Allele> afZeroAlleles = new LinkedList<Allele>();
|
|
||||||
for ( int altI = 0; altI < nAltAlleles; altI++ ) {
|
for ( int altI = 0; altI < nAltAlleles; altI++ ) {
|
||||||
final Allele altAllele = vc.getAlternateAllele(altI);
|
vcs.add(biallelicCombinedGLs(vc, altI + 1));
|
||||||
final List<Allele> biallelic = Arrays.asList(vc.getReference(), altAllele);
|
|
||||||
vcs.add(biallelicCombinedGLs(vc, biallelic, afZeroAlleles, altI + 1));
|
|
||||||
//afZeroAlleles.add(altAllele);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return vcs;
|
return vcs;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected VariantContext biallelicCombinedGLs(final VariantContext rootVC, final List<Allele> biallelic, final List<Allele> afZeroAlleles, final int allele2) {
|
/**
|
||||||
|
* Create a single bi-allelic variant context from rootVC with alt allele with index altAlleleIndex
|
||||||
|
*
|
||||||
|
* @param rootVC the root (potentially multi-allelic) variant context
|
||||||
|
* @param altAlleleIndex index of the alt allele, from 0 == first alt allele
|
||||||
|
* @return a bi-allelic variant context based on rootVC
|
||||||
|
*/
|
||||||
|
@Requires({"rootVC.getNAlleles() > 1", "altAlleleIndex < rootVC.getNAlleles()"})
|
||||||
|
@Ensures({"result.isBiallelic()"})
|
||||||
|
protected final VariantContext biallelicCombinedGLs(final VariantContext rootVC, final int altAlleleIndex) {
|
||||||
if ( rootVC.isBiallelic() ) {
|
if ( rootVC.isBiallelic() ) {
|
||||||
if ( ! afZeroAlleles.isEmpty() ) throw new IllegalArgumentException("Root VariantContext is biallelic but afZeroAlleles wasn't empty: " + afZeroAlleles);
|
|
||||||
return rootVC;
|
return rootVC;
|
||||||
} else {
|
} else {
|
||||||
final Set<Integer> allelesToDiscard = new HashSet<Integer>(rootVC.getAlleleIndices(afZeroAlleles));
|
|
||||||
final int nAlts = rootVC.getNAlleles() - 1;
|
final int nAlts = rootVC.getNAlleles() - 1;
|
||||||
final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples());
|
final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples());
|
||||||
for ( final Genotype g : rootVC.getGenotypes() )
|
for ( final Genotype g : rootVC.getGenotypes() )
|
||||||
biallelicGenotypes.add(combineGLs(g, allele2, allelesToDiscard, nAlts));
|
biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts));
|
||||||
|
|
||||||
final VariantContextBuilder vcb = new VariantContextBuilder(rootVC);
|
final VariantContextBuilder vcb = new VariantContextBuilder(rootVC);
|
||||||
vcb.alleles(biallelic);
|
final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1);
|
||||||
|
vcb.alleles(Arrays.asList(rootVC.getReference(), altAllele));
|
||||||
vcb.genotypes(biallelicGenotypes);
|
vcb.genotypes(biallelicGenotypes);
|
||||||
return vcb.make();
|
return vcb.make();
|
||||||
}
|
}
|
||||||
|
|
@ -203,30 +237,16 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
* XB = AB + BC
|
* XB = AB + BC
|
||||||
* BB = BB
|
* BB = BB
|
||||||
*
|
*
|
||||||
* Supports the additional mode of simply dropping GLs whose allele index occurs in allelesToDiscard. This is
|
|
||||||
* useful in the case where you want to drop alleles (not combine them), such as above:
|
|
||||||
*
|
|
||||||
* AA AB BB AC BC CC
|
|
||||||
*
|
|
||||||
* and we want to get the bi-allelic GLs for X/B, where X is everything not B, but dropping C (index 2)
|
|
||||||
*
|
|
||||||
* XX = AA (since X = A and C is dropped)
|
|
||||||
* XB = AB
|
|
||||||
* BB = BB
|
|
||||||
*
|
|
||||||
* This allows us to recover partial GLs the correspond to any allele in allelesToDiscard having strictly
|
|
||||||
* AF == 0.
|
|
||||||
*
|
|
||||||
* @param original the original multi-allelic genotype
|
* @param original the original multi-allelic genotype
|
||||||
* @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0
|
* @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0
|
||||||
* @param nAlts the total number of alt alleles
|
* @param nAlts the total number of alt alleles
|
||||||
* @return a new biallelic genotype with appropriate PLs
|
* @return a new biallelic genotype with appropriate PLs
|
||||||
*/
|
*/
|
||||||
@Requires({"original.hasLikelihoods()", "! allelesToDiscard.contains(altIndex)"})
|
@Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"})
|
||||||
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
|
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
|
||||||
protected Genotype combineGLs(final Genotype original, final int altIndex, final Set<Integer> allelesToDiscard, final int nAlts ) {
|
protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) {
|
||||||
if ( original.isNonInformative() )
|
if ( original.isNonInformative() )
|
||||||
return new GenotypeBuilder(original).PL(new int[]{0,0,0}).alleles(BIALLELIC_NOCALL).make();
|
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
|
||||||
|
|
||||||
if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts);
|
if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts);
|
||||||
|
|
||||||
|
|
@ -236,10 +256,6 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
for ( int index = 0; index < normalizedPr.length; index++ ) {
|
for ( int index = 0; index < normalizedPr.length; index++ ) {
|
||||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index);
|
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index);
|
||||||
|
|
||||||
// just continue if we shouldn't include the pair because it's in the discard set
|
|
||||||
if ( discardAllelePair(pair, allelesToDiscard) )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if ( pair.alleleIndex1 == altIndex ) {
|
if ( pair.alleleIndex1 == altIndex ) {
|
||||||
if ( pair.alleleIndex2 == altIndex )
|
if ( pair.alleleIndex2 == altIndex )
|
||||||
// hom-alt case
|
// hom-alt case
|
||||||
|
|
@ -263,11 +279,7 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
|
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean discardAllelePair(final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair, Set<Integer> allelesToDiscard) {
|
protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
|
||||||
return allelesToDiscard.contains(pair.alleleIndex1) || allelesToDiscard.contains(pair.alleleIndex2);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
|
|
||||||
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
|
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
|
||||||
|
|
||||||
// sort the results, so the most likely allele is first
|
// sort the results, so the most likely allele is first
|
||||||
|
|
@ -291,10 +303,11 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
/**
|
/**
|
||||||
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result
|
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result
|
||||||
*
|
*
|
||||||
|
* TODO -- add more docs
|
||||||
|
*
|
||||||
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
|
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
|
||||||
*/
|
*/
|
||||||
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
|
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
|
||||||
final double log10LikelihoodsOfACEq0,
|
|
||||||
final List<AFCalcResult> sortedResultsWithThetaNPriors) {
|
final List<AFCalcResult> sortedResultsWithThetaNPriors) {
|
||||||
int nEvaluations = 0;
|
int nEvaluations = 0;
|
||||||
final int nAltAlleles = sortedResultsWithThetaNPriors.size();
|
final int nAltAlleles = sortedResultsWithThetaNPriors.size();
|
||||||
|
|
@ -302,8 +315,8 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
final double[] log10PriorsOfAC = new double[2];
|
final double[] log10PriorsOfAC = new double[2];
|
||||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
||||||
|
|
||||||
// this value is a sum in real space so we need to store values to sum up later
|
// this value is a sum in log space
|
||||||
final double[] log10LikelihoodsOfACGt0 = new double[nAltAlleles];
|
double log10PosteriorOfACEq0Sum = 0.0;
|
||||||
|
|
||||||
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
|
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
|
||||||
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
|
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
|
||||||
|
|
@ -316,7 +329,8 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0();
|
log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0();
|
||||||
|
|
||||||
// the AF > 0 case requires us to store the normalized likelihood for later summation
|
// the AF > 0 case requires us to store the normalized likelihood for later summation
|
||||||
log10LikelihoodsOfACGt0[altI] = sortedResultWithThetaNPriors.getLog10LikelihoodOfAFGT0();
|
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR )
|
||||||
|
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
|
||||||
|
|
||||||
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
||||||
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
|
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
|
||||||
|
|
@ -325,14 +339,19 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
|
nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
|
||||||
}
|
}
|
||||||
|
|
||||||
// the log10 likelihoods are the sum of the log10 likelihoods across all alt alleles
|
// In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C,
|
||||||
final double[] log10LikelihoodsOfAC = new double[]{
|
// the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently
|
||||||
log10LikelihoodsOfACEq0,
|
// log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0
|
||||||
MathUtils.log10sumLog10(log10LikelihoodsOfACGt0)};
|
final double log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO);
|
||||||
|
final double[] log10LikelihoodsOfAC = new double[] {
|
||||||
|
// L + prior = posterior => L = poster - prior
|
||||||
|
log10PosteriorOfACEq0Sum - log10PriorsOfAC[0],
|
||||||
|
log10PosteriorOfACGt0 - log10PriorsOfAC[1]
|
||||||
|
};
|
||||||
|
|
||||||
return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(),
|
return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(),
|
||||||
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true, true), // necessary to ensure all values < 0
|
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), // necessary to ensure all values < 0
|
||||||
MathUtils.normalizeFromLog10(log10PriorsOfAC, true), // priors incorporate multiple alt alleles, must be normalized
|
MathUtils.normalizeFromLog10(log10PriorsOfAC, true), // priors incorporate multiple alt alleles, must be normalized
|
||||||
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
|
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
|
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) {
|
|
||||||
return new StateTracker();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -596,7 +596,6 @@ public class MathUtils {
|
||||||
if (keepInLogSpace) {
|
if (keepInLogSpace) {
|
||||||
for (int i = 0; i < array.length; i++) {
|
for (int i = 0; i < array.length; i++) {
|
||||||
array[i] -= maxValue;
|
array[i] -= maxValue;
|
||||||
array[i] = Math.max(array[i], LOG10_P_OF_ZERO);
|
|
||||||
}
|
}
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
@ -613,8 +612,11 @@ public class MathUtils {
|
||||||
sum += normalized[i];
|
sum += normalized[i];
|
||||||
for (int i = 0; i < array.length; i++) {
|
for (int i = 0; i < array.length; i++) {
|
||||||
double x = normalized[i] / sum;
|
double x = normalized[i] / sum;
|
||||||
if (takeLog10OfOutput)
|
if (takeLog10OfOutput) {
|
||||||
x = Math.max(Math.log10(x), LOG10_P_OF_ZERO);
|
x = Math.log10(x);
|
||||||
|
if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) )
|
||||||
|
x = array[i] - maxValue;
|
||||||
|
}
|
||||||
|
|
||||||
normalized[i] = x;
|
normalized[i] = x;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,12 @@ import java.io.PrintStream;
|
||||||
* to the provided output stream. For testing/debugging purposes.
|
* to the provided output stream. For testing/debugging purposes.
|
||||||
*
|
*
|
||||||
* Log entries are of the following form (fields are tab-separated):
|
* Log entries are of the following form (fields are tab-separated):
|
||||||
* LABEL VALUE KEY1 KEY2 ... KEY_N
|
* LABEL OPERATION VALUE KEY1 KEY2 ... KEY_N
|
||||||
|
*
|
||||||
|
* A header line is written before the log entries giving the dimensions of this NestedIntegerArray.
|
||||||
|
* It has the form:
|
||||||
|
*
|
||||||
|
* # LABEL SIZE_OF_FIRST_DIMENSION SIZE_OF_SECOND_DIMENSION ... SIZE_OF_NTH_DIMENSION
|
||||||
*
|
*
|
||||||
* @author David Roazen
|
* @author David Roazen
|
||||||
*/
|
*/
|
||||||
|
|
@ -43,6 +48,9 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
|
||||||
private PrintStream log;
|
private PrintStream log;
|
||||||
private String logEntryLabel;
|
private String logEntryLabel;
|
||||||
|
|
||||||
|
public static final String HEADER_LINE_PREFIX = "# ";
|
||||||
|
public enum NestedIntegerArrayOperation { GET, PUT };
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param log output stream to which to log update operations
|
* @param log output stream to which to log update operations
|
||||||
|
|
@ -57,6 +65,37 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
|
||||||
}
|
}
|
||||||
this.log = log;
|
this.log = log;
|
||||||
this.logEntryLabel = logEntryLabel != null ? logEntryLabel : "";
|
this.logEntryLabel = logEntryLabel != null ? logEntryLabel : "";
|
||||||
|
|
||||||
|
// Write the header line recording the dimensions of this NestedIntegerArray:
|
||||||
|
StringBuilder logHeaderLine = new StringBuilder();
|
||||||
|
|
||||||
|
logHeaderLine.append(HEADER_LINE_PREFIX);
|
||||||
|
logHeaderLine.append(this.logEntryLabel);
|
||||||
|
for ( int dimension : dimensions ) {
|
||||||
|
logHeaderLine.append("\t");
|
||||||
|
logHeaderLine.append(dimension);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.log.println(logHeaderLine.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T get( final int... keys ) {
|
||||||
|
StringBuilder logEntry = new StringBuilder();
|
||||||
|
|
||||||
|
logEntry.append(logEntryLabel);
|
||||||
|
logEntry.append("\t");
|
||||||
|
logEntry.append(NestedIntegerArrayOperation.GET);
|
||||||
|
logEntry.append("\t"); // empty field for the datum value
|
||||||
|
|
||||||
|
for ( int key : keys ) {
|
||||||
|
logEntry.append("\t");
|
||||||
|
logEntry.append(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.println(logEntry.toString());
|
||||||
|
|
||||||
|
return super.get(keys);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -67,6 +106,8 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
|
||||||
|
|
||||||
logEntry.append(logEntryLabel);
|
logEntry.append(logEntryLabel);
|
||||||
logEntry.append("\t");
|
logEntry.append("\t");
|
||||||
|
logEntry.append(NestedIntegerArrayOperation.PUT);
|
||||||
|
logEntry.append("\t");
|
||||||
logEntry.append(value);
|
logEntry.append(value);
|
||||||
for ( int key : keys ) {
|
for ( int key : keys ) {
|
||||||
logEntry.append("\t");
|
logEntry.append("\t");
|
||||||
|
|
|
||||||
|
|
@ -298,12 +298,16 @@ public abstract class Genotype implements Comparable<Genotype> {
|
||||||
* @return true if all samples PLs are equal and == 0
|
* @return true if all samples PLs are equal and == 0
|
||||||
*/
|
*/
|
||||||
public boolean isNonInformative() {
|
public boolean isNonInformative() {
|
||||||
for ( final int PL : getPL() ) {
|
if ( getPL() == null )
|
||||||
if ( PL != 0 )
|
return true;
|
||||||
return false;
|
else {
|
||||||
}
|
for ( final int PL : getPL() ) {
|
||||||
|
if ( PL != 0 )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -349,7 +349,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||||
Arrays.asList("beee9457d7cea42006ac45400db5e873"));
|
Arrays.asList("f3ff7fe0f15f31eadd726c711d6bf3de"));
|
||||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest {
|
||||||
for ( final int nct : Arrays.asList(1, 2) ) {
|
for ( final int nct : Arrays.asList(1, 2) ) {
|
||||||
// tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct });
|
// tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct });
|
||||||
//// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct });
|
//// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct });
|
||||||
tests.add(new Object[]{ "BOTH", "78ce72d8f9d029313f5f2ceb02bb9822", nt, nct });
|
tests.add(new Object[]{ "BOTH", "8cad82c3a5f5b932042933f136663c8a", nt, nct });
|
||||||
}
|
}
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.queue
|
package org.broadinstitute.sting.queue
|
||||||
|
|
||||||
import function.QFunction
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import org.broadinstitute.sting.commandline._
|
import org.broadinstitute.sting.commandline._
|
||||||
import org.broadinstitute.sting.queue.util._
|
import org.broadinstitute.sting.queue.util._
|
||||||
|
|
@ -96,18 +95,18 @@ class QCommandLine extends CommandLineProgram with Logging {
|
||||||
new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
|
new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
|
||||||
}
|
}
|
||||||
|
|
||||||
private lazy val qStatusMessengerPluginManager = {
|
private lazy val qCommandPlugin = {
|
||||||
new PluginManager[QStatusMessenger](classOf[QStatusMessenger])
|
new PluginManager[QCommandPlugin](classOf[QCommandPlugin])
|
||||||
}
|
}
|
||||||
|
|
||||||
ClassFieldCache.parsingEngine = new ParsingEngine(this)
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes the QScripts passed in, runs their script() methods, retrieves their generated
|
* Takes the QScripts passed in, runs their script() methods, retrieves their generated
|
||||||
* functions, and then builds and runs a QGraph based on the dependencies.
|
* functions, and then builds and runs a QGraph based on the dependencies.
|
||||||
*/
|
*/
|
||||||
def execute = {
|
def execute = {
|
||||||
val allStatusMessengers = qStatusMessengerPluginManager.createAllTypes()
|
ClassFieldCache.parsingEngine = this.parser
|
||||||
|
|
||||||
|
val allCommandPlugins = qCommandPlugin.createAllTypes()
|
||||||
|
|
||||||
if (settings.qSettings.runName == null)
|
if (settings.qSettings.runName == null)
|
||||||
settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
|
settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
|
||||||
|
|
@ -115,14 +114,24 @@ class QCommandLine extends CommandLineProgram with Logging {
|
||||||
settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp")
|
settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp")
|
||||||
qGraph.initializeWithSettings(settings)
|
qGraph.initializeWithSettings(settings)
|
||||||
|
|
||||||
for (statusMessenger <- allStatusMessengers) {
|
for (commandPlugin <- allCommandPlugins) {
|
||||||
loadArgumentsIntoObject(statusMessenger)
|
loadArgumentsIntoObject(commandPlugin)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (statusMessenger <- allStatusMessengers) {
|
for (commandPlugin <- allCommandPlugins) {
|
||||||
statusMessenger.started()
|
if (commandPlugin.statusMessenger != null)
|
||||||
|
commandPlugin.statusMessenger.started()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Default command plugin argument?
|
||||||
|
val remoteFileConverter = (
|
||||||
|
for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null))
|
||||||
|
yield commandPlugin.remoteFileConverter
|
||||||
|
).headOption.getOrElse(null)
|
||||||
|
|
||||||
|
if (remoteFileConverter != null)
|
||||||
|
loadArgumentsIntoObject(remoteFileConverter)
|
||||||
|
|
||||||
val allQScripts = qScriptPluginManager.createAllTypes()
|
val allQScripts = qScriptPluginManager.createAllTypes()
|
||||||
for (script <- allQScripts) {
|
for (script <- allQScripts) {
|
||||||
logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
|
logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
|
||||||
|
|
@ -137,10 +146,15 @@ class QCommandLine extends CommandLineProgram with Logging {
|
||||||
case e: Exception =>
|
case e: Exception =>
|
||||||
throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e)
|
throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (remoteFileConverter != null) {
|
||||||
|
if (remoteFileConverter.convertToRemoteEnabled)
|
||||||
|
script.mkRemoteOutputs(remoteFileConverter)
|
||||||
|
}
|
||||||
|
|
||||||
script.functions.foreach(qGraph.add(_))
|
script.functions.foreach(qGraph.add(_))
|
||||||
logger.info("Added " + script.functions.size + " functions")
|
logger.info("Added " + script.functions.size + " functions")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute the job graph
|
// Execute the job graph
|
||||||
qGraph.run()
|
qGraph.run()
|
||||||
|
|
||||||
|
|
@ -162,14 +176,16 @@ class QCommandLine extends CommandLineProgram with Logging {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
logger.info("Done with errors")
|
logger.info("Done with errors")
|
||||||
qGraph.logFailed()
|
qGraph.logFailed()
|
||||||
for (statusMessenger <- allStatusMessengers)
|
for (commandPlugin <- allCommandPlugins)
|
||||||
statusMessenger.exit("Done with errors")
|
if (commandPlugin.statusMessenger != null)
|
||||||
|
commandPlugin.statusMessenger.exit("Done with errors")
|
||||||
1
|
1
|
||||||
} else {
|
} else {
|
||||||
if (settings.run) {
|
if (settings.run) {
|
||||||
allQScripts.foreach(_.pushOutputs())
|
allQScripts.foreach(_.pushOutputs())
|
||||||
for (statusMessenger <- allStatusMessengers)
|
for (commandPlugin <- allCommandPlugins)
|
||||||
statusMessenger.done(allQScripts.map(_.remoteOutputs))
|
if (commandPlugin.statusMessenger != null)
|
||||||
|
commandPlugin.statusMessenger.done(allQScripts.map(_.remoteOutputs))
|
||||||
}
|
}
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
@ -189,7 +205,7 @@ class QCommandLine extends CommandLineProgram with Logging {
|
||||||
override def getArgumentSources = {
|
override def getArgumentSources = {
|
||||||
var plugins = Seq.empty[Class[_]]
|
var plugins = Seq.empty[Class[_]]
|
||||||
plugins ++= qScriptPluginManager.getPlugins
|
plugins ++= qScriptPluginManager.getPlugins
|
||||||
plugins ++= qStatusMessengerPluginManager.getPlugins
|
plugins ++= qCommandPlugin.getPlugins
|
||||||
plugins.toArray
|
plugins.toArray
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -200,11 +216,10 @@ class QCommandLine extends CommandLineProgram with Logging {
|
||||||
override def getArgumentSourceName(source: Class[_]) = {
|
override def getArgumentSourceName(source: Class[_]) = {
|
||||||
if (classOf[QScript].isAssignableFrom(source))
|
if (classOf[QScript].isAssignableFrom(source))
|
||||||
qScriptPluginManager.getName(source.asSubclass(classOf[QScript]))
|
qScriptPluginManager.getName(source.asSubclass(classOf[QScript]))
|
||||||
else if (classOf[QStatusMessenger].isAssignableFrom(source))
|
else if (classOf[QCommandPlugin].isAssignableFrom(source))
|
||||||
qStatusMessengerPluginManager.getName(source.asSubclass(classOf[QStatusMessenger]))
|
qCommandPlugin.getName(source.asSubclass(classOf[QCommandPlugin]))
|
||||||
else
|
else
|
||||||
null
|
null
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
package org.broadinstitute.sting.queue
|
||||||
|
|
||||||
|
import engine.QStatusMessenger
|
||||||
|
import util.RemoteFileConverter
|
||||||
|
|
||||||
|
trait QCommandPlugin {
|
||||||
|
def statusMessenger: QStatusMessenger = null
|
||||||
|
def remoteFileConverter: RemoteFileConverter = null
|
||||||
|
}
|
||||||
|
|
@ -108,6 +108,24 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
||||||
functions.foreach( f => add(f) )
|
functions.foreach( f => add(f) )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert all @Output files to remote output files.
|
||||||
|
* @param remoteFileConverter Converter for files to remote files.
|
||||||
|
*/
|
||||||
|
def mkRemoteOutputs(remoteFileConverter: RemoteFileConverter) {
|
||||||
|
for (field <- outputFields) {
|
||||||
|
val fieldFile = ClassFieldCache.getFieldFile(this, field)
|
||||||
|
if (fieldFile != null && !fieldFile.isInstanceOf[RemoteFile]) {
|
||||||
|
val fieldName = ClassFieldCache.fullName(field)
|
||||||
|
val remoteFile = remoteFileConverter.convertToRemote(fieldFile, fieldName)
|
||||||
|
ClassFieldCache.setFieldValue(this, field, remoteFile)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pull all remote files to the local disk.
|
||||||
|
*/
|
||||||
def pullInputs() {
|
def pullInputs() {
|
||||||
val inputs = ClassFieldCache.getFieldFiles(this, inputFields)
|
val inputs = ClassFieldCache.getFieldFiles(this, inputFields)
|
||||||
for (remoteFile <- filterRemoteFiles(inputs)) {
|
for (remoteFile <- filterRemoteFiles(inputs)) {
|
||||||
|
|
@ -116,6 +134,9 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Push all remote files from the local disk.
|
||||||
|
*/
|
||||||
def pushOutputs() {
|
def pushOutputs() {
|
||||||
val outputs = ClassFieldCache.getFieldFiles(this, outputFields)
|
val outputs = ClassFieldCache.getFieldFiles(this, outputFields)
|
||||||
for (remoteFile <- filterRemoteFiles(outputs)) {
|
for (remoteFile <- filterRemoteFiles(outputs)) {
|
||||||
|
|
@ -124,6 +145,10 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List out the remote outputs
|
||||||
|
* @return the RemoteFile outputs by argument source
|
||||||
|
*/
|
||||||
def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] =
|
def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] =
|
||||||
outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
|
outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -180,4 +180,15 @@ object ClassFieldCache {
|
||||||
case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown))
|
case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// other utilities
|
||||||
|
//
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the fullName of the argument
|
||||||
|
* @param field ArgumentSource to check
|
||||||
|
* @return Full name of the argument source
|
||||||
|
*/
|
||||||
|
def fullName(field: ArgumentSource) = field.createArgumentDefinitions().get(0).fullName
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
package org.broadinstitute.sting.queue.util
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
|
||||||
|
trait RemoteFileConverter {
|
||||||
|
type RemoteFileType <: RemoteFile
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this remote file creator is capable of converting to a remote file.
|
||||||
|
* @return true if ready to convert
|
||||||
|
*/
|
||||||
|
def convertToRemoteEnabled: Boolean
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts to a remote file
|
||||||
|
* @param file The original file
|
||||||
|
* @param name A "name" to use for the remote file
|
||||||
|
* @return The new version of this remote file.
|
||||||
|
*/
|
||||||
|
def convertToRemote(file: File, name: String): RemoteFileType
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue