Fix merge conflicts

This commit is contained in:
Guillermo del Angel 2012-10-17 20:13:51 -04:00
commit bfb73e1c5b
33 changed files with 870 additions and 591 deletions

View File

@ -0,0 +1,21 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import org.broadinstitute.sting.utils.GenomeLocComparator;
import java.util.TreeSet;
/**
* A stash of regions that must be kept uncompressed in all samples
*
* In general, these are regions that were kept uncompressed by a tumor sample and we want to force
* all other samples (normals and/or tumors) to also keep these regions uncompressed
*
* User: carneiro
* Date: 10/15/12
* Time: 4:08 PM
*/
public class CompressionStash extends TreeSet<SimpleGenomeLoc> {
public CompressionStash() {
super(new GenomeLocComparator());
}
}

View File

@ -55,11 +55,12 @@ public class MultiSampleCompressor implements Compressor {
final int minBaseQual,
final ReduceReads.DownsampleStrategy downsampleStrategy,
final int nContigs,
final boolean allowPolyploidReduction) {
final boolean allowPolyploidReduction,
final CompressionStash compressionStash) {
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
compressorsPerSample.put(name,
new SingleSampleCompressor(contextSize, downsampleCoverage,
minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction));
minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction, compressionStash));
}
}

View File

@ -222,6 +222,8 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
HashMap<String, Long> readNameHash; // This hash will keep the name of the original read the new compressed name (a number).
Long nextReadNumber = 1L; // The next number to use for the compressed read name.
CompressionStash compressionStash = new CompressionStash();
SortedSet<GenomeLoc> intervalList;
private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag
@ -328,7 +330,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
*/
@Override
public ReduceReadsStash reduceInit() {
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION));
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION, compressionStash));
}
/**

View File

@ -20,6 +20,7 @@ public class SingleSampleCompressor implements Compressor {
final private ReduceReads.DownsampleStrategy downsampleStrategy;
final private int nContigs;
final private boolean allowPolyploidReduction;
final CompressionStash compressionStash;
private SlidingWindow slidingWindow;
private int slidingWindowCounter;
@ -33,7 +34,8 @@ public class SingleSampleCompressor implements Compressor {
final int minBaseQual,
final ReduceReads.DownsampleStrategy downsampleStrategy,
final int nContigs,
final boolean allowPolyploidReduction) {
final boolean allowPolyploidReduction,
final CompressionStash compressionStash) {
this.contextSize = contextSize;
this.downsampleCoverage = downsampleCoverage;
this.minMappingQuality = minMappingQuality;
@ -44,6 +46,7 @@ public class SingleSampleCompressor implements Compressor {
this.downsampleStrategy = downsampleStrategy;
this.nContigs = nContigs;
this.allowPolyploidReduction = allowPolyploidReduction;
this.compressionStash = compressionStash;
}
/**
@ -65,7 +68,7 @@ public class SingleSampleCompressor implements Compressor {
}
if ( slidingWindow == null) { // this is the first read
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction);
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction, compressionStash);
slidingWindowCounter++;
}

View File

@ -6,7 +6,6 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
@ -56,6 +55,7 @@ public class SlidingWindow {
private final int nContigs;
private boolean allowPolyploidReductionInGeneral;
private CompressionStash compressionStash;
/**
* The types of synthetic reads to use in the finalizeAndAdd method
@ -87,7 +87,7 @@ public class SlidingWindow {
}
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction) {
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction, CompressionStash compressionStash) {
this.contextSize = contextSize;
this.downsampleCoverage = downsampleCoverage;
@ -118,6 +118,7 @@ public class SlidingWindow {
this.nContigs = nContigs;
this.allowPolyploidReductionInGeneral = allowPolyploidReduction;
this.compressionStash = compressionStash;
}
/**
@ -145,7 +146,7 @@ public class SlidingWindow {
* @param variantSite boolean array with true marking variant regions
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
*/
private Pair<Integer, Integer> getNextVariantRegion(int from, int to, boolean[] variantSite) {
private SimpleGenomeLoc getNextVariantRegion(int from, int to, boolean[] variantSite) {
boolean foundStart = false;
int variantRegionStartIndex = 0;
for (int i=from; i<to; i++) {
@ -154,10 +155,10 @@ public class SlidingWindow {
foundStart = true;
}
else if(!variantSite[i] && foundStart) {
return(new Pair<Integer, Integer>(variantRegionStartIndex, i-1));
return(new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, i-1, true));
}
}
return (foundStart) ? new Pair<Integer, Integer>(variantRegionStartIndex, -1) : null;
return (foundStart) ? new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, to-1, false) : null;
}
/**
@ -168,23 +169,22 @@ public class SlidingWindow {
* @param variantSite boolean array with true marking variant regions
* @return a list with start/stops of variant regions following getNextVariantRegion description
*/
private List<Pair<Integer, Integer>> getAllVariantRegions(int from, int to, boolean[] variantSite) {
List<Pair<Integer,Integer>> regions = new LinkedList<Pair<Integer, Integer>>();
private CompressionStash getVariantRegionsFromThisSample(int from, int to, boolean[] variantSite) {
CompressionStash regions = new CompressionStash();
int index = from;
while(index < to) {
Pair<Integer,Integer> result = getNextVariantRegion(index, to, variantSite);
SimpleGenomeLoc result = getNextVariantRegion(index, to, variantSite);
if (result == null)
break;
regions.add(result);
if (result.getSecond() < 0)
if (result.getStop() < 0)
break;
index = result.getSecond() + 1;
index = result.getStop() + 1;
}
return regions;
}
/**
* Determines if the window can be slid given the new incoming read.
*
@ -203,7 +203,7 @@ public class SlidingWindow {
boolean[] variantSite = markSites(getStartLocation(windowHeader) + readStartHeaderIndex);
int breakpoint = Math.max(readStartHeaderIndex - contextSize - 1, 0); // this is the limit of what we can close/send to consensus (non-inclusive)
List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, breakpoint, variantSite);
CompressionStash regions = getVariantRegionsFromThisSample(0, breakpoint, variantSite);
finalizedReads = closeVariantRegions(regions, false);
List<GATKSAMRecord> readsToRemove = new LinkedList<GATKSAMRecord>();
@ -567,26 +567,31 @@ public class SlidingWindow {
result.addAll(addToSyntheticReads(windowHeader, 0, stop, false));
result.addAll(finalizeAndAdd(ConsensusType.BOTH));
return result; // finalized reads will be downsampled if necessary
return result; // finalized reads will be downsampled if necessary
}
private List<GATKSAMRecord> closeVariantRegions(List<Pair<Integer, Integer>> regions, boolean forceClose) {
private List<GATKSAMRecord> closeVariantRegions(CompressionStash regions, boolean forceClose) {
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
if (!regions.isEmpty()) {
int lastStop = -1;
for (Pair<Integer, Integer> region : regions) {
int start = region.getFirst();
int stop = region.getSecond();
if (stop < 0 && forceClose)
stop = windowHeader.size() - 1;
if (stop >= 0) {
allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
lastStop = stop;
for (SimpleGenomeLoc region : regions) {
int start = region.getStart();
int stop = region.getStop();
if (!region.isFinished()) {
if(forceClose) // region is unfinished but we're forcing the close of this window
stop = windowHeader.size() - 1;
else
continue; // region is unfinished and we're not forcing the close of this window
}
allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
lastStop = stop;
}
for (int i = 0; i < lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
windowHeader.remove(); // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
for (int i = 0; i < lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
windowHeader.remove(); // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
}
return allReads;
}
@ -626,7 +631,7 @@ public class SlidingWindow {
if (!windowHeader.isEmpty()) {
boolean[] variantSite = markSites(getStopLocation(windowHeader) + 1);
List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, windowHeader.size(), variantSite);
CompressionStash regions = getVariantRegionsFromThisSample(0, windowHeader.size(), variantSite);
finalizedReads = closeVariantRegions(regions, true);
if (!windowHeader.isEmpty()) {

View File

@ -5,23 +5,22 @@ import org.apache.log4j.Logger;
import org.apache.log4j.TTCCLayout;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.io.*;
import java.util.*;
/**
* Created with IntelliJ IDEA.
* User: depristo
* Date: 10/2/12
* Time: 10:25 AM
* To change this template use File | Settings | File Templates.
* A simple GATK utility (i.e, runs from command-line) for assessing the performance of
* the exact model
*/
public class AFCalcPerformanceTest {
final static Logger logger = Logger.getLogger(AFCalcPerformanceTest.class);
@ -190,7 +189,8 @@ public class AFCalcPerformanceTest {
public enum Operation {
ANALYZE,
SINGLE
SINGLE,
EXACT_LOG
}
public static void main(final String[] args) throws Exception {
final TTCCLayout layout = new TTCCLayout();
@ -204,10 +204,49 @@ public class AFCalcPerformanceTest {
switch ( op ) {
case ANALYZE: analyze(args); break;
case SINGLE: profileBig(args); break;
case EXACT_LOG: exactLog(args); break;
default: throw new IllegalAccessException("unknown operation " + op);
}
}
private static void exactLog(final String[] args) throws Exception {
final File ref = new File(args[1]);
final File exactLogFile = new File(args[2]);
final List<Integer> startsToUse = new LinkedList<Integer>();
for ( int i = 3; i < args.length; i++ )
startsToUse.add(Integer.valueOf(args[i]));
final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(ref);
final GenomeLocParser parser = new GenomeLocParser(seq);
final BufferedReader reader = new BufferedReader(new FileReader(exactLogFile));
final List<ExactCallLogger.ExactCall> loggedCalls = ExactCallLogger.readExactLog(reader, startsToUse, parser);
for ( final ExactCallLogger.ExactCall call : loggedCalls ) {
final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1,
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
AFCalcTestBuilder.PriorType.human);
logger.info(call);
final SimpleTimer timer = new SimpleTimer().start();
final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors());
final long newNanoTime = timer.getElapsedTimeNano();
if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) {
logger.info("**** ONE IS POLY");
}
logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0());
final double speedup = call.runtime / (1.0 * newNanoTime);
logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x");
for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) {
if ( a.isNonReference() ) {
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
}
}
}
}
private static void profileBig(final String[] args) throws Exception {
final int nSamples = Integer.valueOf(args[1]);
final int ac = Integer.valueOf(args[2]);
@ -234,7 +273,6 @@ public class AFCalcPerformanceTest {
final List<ModelParams> modelParams = Arrays.asList(
new ModelParams(AFCalcFactory.Calculation.EXACT_REFERENCE, 10000, 10),
// new ModelParams(AFCalcTestBuilder.ModelType.GeneralExact, 100, 10),
new ModelParams(AFCalcFactory.Calculation.EXACT_CONSTRAINED, 10000, 100),
new ModelParams(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 10000, 1000));
final boolean ONLY_HUMAN_PRIORS = false;

View File

@ -45,12 +45,16 @@ public class AFCalcTestBuilder {
human
}
public int getNumAltAlleles() {
return numAltAlleles;
}
public int getnSamples() {
return nSamples;
}
public AFCalc makeModel() {
return AFCalcFactory.createAFCalc(modelType, nSamples, 4, 4, 2);
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), getNumAltAlleles(), 2);
}
public double[] makePriors() {

View File

@ -348,12 +348,14 @@ public class LikelihoodCalculationEngine {
}
}
}
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
for( final Allele a : call.getFirst().getAlleles() )
for( final Allele a : call.getFirst().getAlleles() ) {
likelihoodMap.add(read, a, 0.0);
}
}
}

View File

@ -21,36 +21,36 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
executeTest(testName, spec);
}
@Test(enabled = false)
@Test(enabled = true)
public void testDefaultCompression() {
RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f");
RRTest("testDefaultCompression ", L, "1f95f3193bd9f120a73c34a0087abaf6");
}
@Test(enabled = false)
@Test(enabled = true)
public void testMultipleIntervals() {
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76");
RRTest("testMultipleIntervals ", intervals, "79213d6ac68d56d4d72dcf511223e424");
}
@Test(enabled = false)
@Test(enabled = true)
public void testHighCompression() {
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047");
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "dab2aa8e3655139974bbe12a568363d9");
}
@Test(enabled = false)
@Test(enabled = true)
public void testLowCompression() {
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "7c9b4a70c2c90b0a995800aa42852e63");
}
@Test(enabled = false)
@Test(enabled = true)
public void testIndelCompression() {
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973");
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "1255245ed4ebeacda90f0dbb4e4da081");
}
@Test(enabled = false)
@Test(enabled = true)
public void testFilteredDeletionCompression() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("891bd6dcda66611f343e8ff25f34aaeb")));
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("122e4e60c4412a31d0aeb3cce879e841")));
}
/**
@ -61,20 +61,20 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
*
* This bam is simplified to replicate the exact bug with the three provided intervals.
*/
@Test(enabled = false)
@Test(enabled = true)
public void testAddingReadAfterTailingTheStash() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6")));
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("4b590269cbe3574dbdd5bdc2bc6f5f1c")));
}
/**
* Divide by zero bug reported by GdA and users in the forum. Happens when the downsampler goes over a region where all reads get
* filtered out.
*/
@Test(enabled = false)
@Test(enabled = true)
public void testDivideByZero() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("93ffdc209d4cc0fc4f0169ca9be55cc2")));
executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("d8d066304f7c187f182bfb50f39baa0c")));
}
}

View File

@ -0,0 +1,87 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class AFCalcPerformanceUnitTest extends BaseTest {
@DataProvider(name = "ScalingTests")
public Object[][] makepolyTestProviderLotsOfAlleles() {
List<Object[]> tests = new ArrayList<Object[]>();
// list of all high-quality models in the system
final List<AFCalcFactory.Calculation> biAllelicModels = Arrays.asList(
AFCalcFactory.Calculation.EXACT_INDEPENDENT,
AFCalcFactory.Calculation.EXACT_REFERENCE);
final List<AFCalcFactory.Calculation> multiAllelicModels = Arrays.asList(
AFCalcFactory.Calculation.EXACT_INDEPENDENT);
// for ( final int nonTypePLs : Arrays.asList(100) ) {
// for ( final int nSamples : Arrays.asList(10000) ) {
// final List<Integer> alleleCounts = Arrays.asList(50);
// for ( final int nAltAlleles : Arrays.asList(1) ) {
for ( final int nonTypePLs : Arrays.asList(100) ) {
for ( final int nSamples : Arrays.asList(100, 1000) ) {
final List<Integer> alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 50, 500);
for ( final int nAltAlleles : Arrays.asList(1, 2, 3) ) {
final List<AFCalcFactory.Calculation> models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels;
for ( final AFCalcFactory.Calculation model : models ) {
for ( final List<Integer> ACs : Utils.makePermutations(alleleCounts, nAltAlleles, true) ) {
if ( MathUtils.sum(ACs) < nSamples * 2 ) {
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(nSamples, nAltAlleles, model, AFCalcTestBuilder.PriorType.human);
tests.add(new Object[]{testBuilder, ACs, nonTypePLs});
}
}
}
}
}
}
return tests.toArray(new Object[][]{});
}
private Pair<Integer, Integer> estNumberOfEvaluations(final AFCalcTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) {
final int evalOverhead = 2; // 2
final int maxEvalsPerSamplePerAC = 3;
int minEvals = 0, maxEvals = 0;
for ( final Allele alt : vc.getAlternateAlleles() ) {
final int AC = vc.getCalledChrCount(alt);
minEvals += AC + evalOverhead; // everyone is hom-var
maxEvals += AC * maxEvalsPerSamplePerAC + 10;
}
return new Pair<Integer, Integer>(minEvals, maxEvals);
}
@Test(dataProvider = "ScalingTests")
private void testScaling(final AFCalcTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL) {
final AFCalc calc = testBuilder.makeModel();
final double[] priors = testBuilder.makePriors();
final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
final AFCalcResult result = calc.getLog10PNonRef(vc, priors);
final Pair<Integer, Integer> expectedNEvaluation = estNumberOfEvaluations(testBuilder, vc, nonTypePL);
final int minEvals = expectedNEvaluation.getFirst();
final int maxEvals = expectedNEvaluation.getSecond();
logger.warn(" min " + minEvals + " obs " + result.getnEvaluations() + " max " + maxEvals + " for test " + testBuilder + " sum(ACs)=" + (int)MathUtils.sum(ACs));
Assert.assertTrue(result.getnEvaluations() >= minEvals,
"Actual number of evaluations " + result.getnEvaluations() + " < min number of evals " + minEvals);
Assert.assertTrue(result.getnEvaluations() <= maxEvals,
"Actual number of evaluations " + result.getnEvaluations() + " > max number of evals " + minEvals);
}
}

View File

@ -0,0 +1,77 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class AFCalcResultUnitTest extends BaseTest {
private static class MyTest {
final double[] Ls, expectedPosteriors;
private MyTest(double[] ls, double[] expectedPosteriors) {
Ls = ls;
this.expectedPosteriors = expectedPosteriors;
}
@Override
public String toString() {
return "Ls [" + Utils.join(",", Ls) + "] expectedPosteriors [" + Utils.join(",", expectedPosteriors) + "]";
}
}
@DataProvider(name = "TestComputePosteriors")
public Object[][] makeTestCombineGLs() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{new MyTest(log10Even, log10Even)});
for ( double L0 = -1e9; L0 < 0.0; L0 /= 10.0 ) {
for ( double L1 = -1e2; L1 < 0.0; L1 /= 100.0 ) {
final double[] input = new double[]{L0, L1};
final double[] expected = MathUtils.normalizeFromLog10(input, true);
tests.add(new Object[]{new MyTest(input, expected)});
}
}
for ( double bigBadL = -1e50; bigBadL < -1e200; bigBadL *= 10 ) {
// test that a huge bad likelihood remains, even with a massive better result
for ( final double betterL : Arrays.asList(-1000.0, -100.0, -10.0, -1.0, -0.1, -0.01, -0.001, 0.0)) {
tests.add(new Object[]{new MyTest(new double[]{bigBadL, betterL}, new double[]{bigBadL, 0.0})});
tests.add(new Object[]{new MyTest(new double[]{betterL, bigBadL}, new double[]{0.0, bigBadL})});
}
}
// test that a modest bad likelihood with an ~0.0 value doesn't get lost
for ( final double badL : Arrays.asList(-10000.0, -1000.0, -100.0, -10.0)) {
tests.add(new Object[]{new MyTest(new double[]{badL, -1e-9}, new double[]{badL, 0.0})});
tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})});
}
return tests.toArray(new Object[][]{});
}
final static double[] log10Even = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
final static Allele C = Allele.create("C");
final static List<Allele> alleles = Arrays.asList(Allele.create("A", true), C);
@Test(enabled = true, dataProvider = "TestComputePosteriors")
private void testComputingPosteriors(final MyTest data) {
final AFCalcResult result = new AFCalcResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0));
Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected");
Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected");
final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()};
Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision");
}
}

View File

@ -372,16 +372,14 @@ public class AFCalcUnitTest extends BaseTest {
final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make();
final AFCalcTestBuilder.PriorType priorType = AFCalcTestBuilder.PriorType.flat;
final List<AFCalcFactory.Calculation> constrainedModel = Arrays.asList(AFCalcFactory.Calculation.EXACT_CONSTRAINED);
final double TOLERANCE = 0.5;
final List<PNonRefData> initialPNonRefData = Arrays.asList(
// bi-allelic sites
new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, TOLERANCE, true),
new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false, constrainedModel),
new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false, constrainedModel),
new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false, constrainedModel),
new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false),
new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false),
new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false),
new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, TOLERANCE, true),
new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, TOLERANCE, true),
@ -448,7 +446,7 @@ public class AFCalcUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@Test(enabled = true & ! DEBUG_ONLY, dataProvider = "Models")
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models")
public void testBiallelicPriors(final AFCalc model) {
for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
@ -456,26 +454,29 @@ public class AFCalcUnitTest extends BaseTest {
for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) {
final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior);
final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2}), true);
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
final AFCalcResult resultTracker = cfg.execute();
final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
final double nonRefPrior = (1-refPrior) / 2;
final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true);
if ( ! Double.isInfinite(priors[1]) ) {
GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior);
final AFCalcResult resultTracker = cfg.execute();
final int actualAC = resultTracker.getAlleleCountsOfMLE()[0];
final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5);
final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior));
final double log10NonRefPost = Math.log10(nonRefPost);
final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0];
final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5);
final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior));
final double log10NonRefPost = Math.log10(nonRefPost);
if ( ! Double.isInfinite(log10NonRefPost) )
Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2);
if ( ! Double.isInfinite(log10NonRefPost) )
Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2);
if ( nonRefPost >= 0.9 )
Assert.assertTrue(resultTracker.isPolymorphic(C, -1));
if ( nonRefPost >= 0.9 )
Assert.assertTrue(resultTracker.isPolymorphic(C, -1));
final int expectedMLEAC = 1; // the MLE is independent of the prior
Assert.assertEquals(actualAC, expectedMLEAC,
"actual AC with priors " + log10NonRefPrior + " not expected "
+ expectedMLEAC + " priors " + Utils.join(",", priors));
final int expectedMLEAC = 1; // the MLE is independent of the prior
Assert.assertEquals(actualAC, expectedMLEAC,
"actual AC with priors " + log10NonRefPrior + " not expected "
+ expectedMLEAC + " priors " + Utils.join(",", priors));
}
}
}
}

View File

@ -1,124 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class ConstrainedAFCalculationModelUnitTest extends BaseTest {
static Allele A = Allele.create("A", true);
static Allele C = Allele.create("C");
static Allele G = Allele.create("G");
protected static Genotype makePL(final List<Allele> expectedGT, int ... pls) {
return AFCalcUnitTest.makePL(expectedGT, pls);
}
@DataProvider(name = "MaxACsToVisit")
public Object[][] makeMaxACsToVisit() {
List<Object[]> tests = new ArrayList<Object[]>();
final int nSamples = 10;
for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) {
final int nChrom = (nSamples - nNonInformative) * 2;
for ( int i = 0; i < nChrom; i++ ) {
// bi-allelic
tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, AFCalcFactory.Calculation.EXACT_CONSTRAINED});
// tri-allelic
for ( int j = 0; j < (nChrom - i); j++)
tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, AFCalcFactory.Calculation.EXACT_CONSTRAINED});
}
}
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "MaxACsToVisit")
public void testMaxACsToVisit(final int nSamples, final List<Integer> requestedACs, final int nNonInformative, final AFCalcFactory.Calculation modelType) {
final int nAlts = requestedACs.size();
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(nSamples, nAlts, modelType,
AFCalcTestBuilder.PriorType.human);
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc);
testExpectedACs(vc, maxACsToVisit);
}
private void testExpectedACs(final VariantContext vc, final int[] maxACsToVisit) {
// this is necessary because cannot ensure that the tester gives us back the
// requested ACs due to rounding errors
final List<Integer> ACs = new ArrayList<Integer>();
for ( final Allele a : vc.getAlternateAlleles() )
ACs.add(vc.getCalledChrCount(a));
for ( int i = 0; i < maxACsToVisit.length; i++ ) {
Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i);
}
}
@DataProvider(name = "MaxACsGenotypes")
public Object[][] makeMaxACsForGenotype() {
List<Object[]> tests = new ArrayList<Object[]>();
final List<Allele> AA = Arrays.asList(A, A);
final List<Allele> AC = Arrays.asList(A, C);
final List<Allele> CC = Arrays.asList(C, C);
final List<Allele> AG = Arrays.asList(A, G);
final List<Allele> GG = Arrays.asList(G, G);
final List<Allele> CG = Arrays.asList(C, G);
final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make();
final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make();
tests.add(new Object[]{vc2, makePL(AA, 0, 10, 10)});
tests.add(new Object[]{vc2, makePL(AC, 10, 0, 10)});
tests.add(new Object[]{vc2, makePL(CC, 10, 10, 0)});
// make sure non-informative => 0
tests.add(new Object[]{vc2, makePL(AA, 0, 0, 0)});
tests.add(new Object[]{vc3, makePL(AA, 0, 0, 0, 0, 0, 0)});
// multi-allelics
tests.add(new Object[]{vc3, makePL(AG, 10, 10, 10, 0, 10, 10)});
tests.add(new Object[]{vc3, makePL(CG, 10, 10, 10, 10, 0, 10)});
tests.add(new Object[]{vc3, makePL(GG, 10, 10, 10, 10, 10, 0)});
// deal with non-informatives third alleles
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 10)});
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 10)});
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 0)});
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 0)});
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 10)});
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 10)});
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 0)});
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 0)});
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "MaxACsGenotypes")
private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) {
final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make();
final AFCalcTestBuilder testBuilder
= new AFCalcTestBuilder(1, vc.getNAlleles()-1, AFCalcFactory.Calculation.EXACT_CONSTRAINED,
AFCalcTestBuilder.PriorType.human);
final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc);
testExpectedACs(vc, maxACsToVisit);
}
}

View File

@ -55,48 +55,14 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@DataProvider(name = "TestCombineGLsWithDrops")
public Object[][] makeTestCombineGLsWithDrops() {
List<Object[]> tests = new ArrayList<Object[]>();
final Set<Integer> noDrops = Collections.emptySet();
final Set<Integer> drop1 = Collections.singleton(1);
final Set<Integer> drop2 = Collections.singleton(2);
// AA AB BB AC BC CC
// drop1 (B): AA AC CC
// drop2 (C): AA AB BB
tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 2, 5), noDrops});
tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 4, 9), noDrops});
tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 1, 2), drop2});
tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 3, 5), drop1});
tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(0, 2, 6), noDrops});
tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(1, 0, 2), noDrops});
tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(2, 1, 0), drop2});
tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(5, 2, 0), drop1});
tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 8,11), noDrops});
tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL( 5, 7, 0), noDrops});
tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 0, 0), drop2});
tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL(10,10, 0), drop1});
return tests.toArray(new Object[][]{});
}
private Genotype makePL(final int ... PLs) {
return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs);
}
@Test(enabled = true, dataProvider = "TestCombineGLs")
private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
testCombineGLsWithDrops(altIndex, nAlts, testg, expected, Collections.<Integer>emptySet());
}
@Test(enabled = true, dataProvider = "TestCombineGLsWithDrops")
private void testCombineGLsWithDrops(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected, Set<Integer> allelesToDrop) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final Genotype combined = calc.combineGLs(testg, altIndex, allelesToDrop, nAlts);
final Genotype combined = calc.combineGLs(testg, altIndex, nAlts);
Assert.assertEquals(combined.getPL(), expected.getPL(),
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
@ -120,22 +86,21 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
final Genotype gACG = makePL( 0, 1, 2, 3, 4, 5);
final Genotype gAGC = makePL( 0, 4, 5, 1, 3, 2);
final Genotype gACcombined = makePL(0, 2, 5);
final Genotype gACcombined2 = makePL(0, 1, 4);
final Genotype gAGcombined = makePL(0, 4, 9);
final Genotype gACdropped = makePL(0, 1, 2);
final Genotype gAGdropped = makePL(0, 3, 5);
// biallelic
tests.add(new Object[]{vcAC.genotypes(gACcombined).make(), Arrays.asList(vcAC.genotypes(gACcombined).make())});
// tri-allelic
tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGdropped).make())});
tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACdropped).make())});
tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGcombined).make())});
tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACcombined2).make())});
return tests.toArray(new Object[][]{});
}
@Test(enabled = false, dataProvider = "TestMakeAlleleConditionalContexts")
@Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts")
private void testMakeAlleleConditionalContexts(final VariantContext vc, final List<VariantContext> expectedVCs) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final List<VariantContext> biAllelicVCs = calc.makeAlleleConditionalContexts(vc);
@ -148,7 +113,8 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
Assert.assertEquals(actual.getAlleles(), expected.getAlleles());
for ( int j = 0; j < actual.getNSamples(); j++ )
Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL());
Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL(),
"expected PLs " + Utils.join(",", expected.getGenotype(j).getPL()) + " not equal to actual " + Utils.join(",", actual.getGenotype(j).getPL()));
}
}

View File

@ -0,0 +1,30 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* GenomeLocs are very useful objects to keep track of genomic locations and perform set operations
* with them.
*
* However, GenomeLocs are bound to strict validation through the GenomeLocParser and cannot
* be created easily for small tasks that do not require the rigors of the GenomeLocParser validation
*
* SimpleGenomeLoc is a simple utility to create GenomeLocs without going through the parser. Should
* only be used outside of the engine.
*
* User: carneiro
* Date: 10/16/12
* Time: 2:07 PM
*/
public class SimpleGenomeLoc extends GenomeLoc {
private boolean finished;
public SimpleGenomeLoc(String contigName, int contigIndex, int start, int stop, boolean finished) {
super(contigName, contigIndex, start, stop);
this.finished = finished;
}
public boolean isFinished() {
return finished;
}
}

View File

@ -29,24 +29,16 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.List;
/**
* Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods
*
*/
public abstract class AFCalc implements Cloneable {
private final static Logger defaultLogger = Logger.getLogger(AFCalc.class);
@ -58,8 +50,8 @@ public abstract class AFCalc implements Cloneable {
protected Logger logger = defaultLogger;
private SimpleTimer callTimer = new SimpleTimer();
private PrintStream callReport = null;
private final AFCalcResultTracker resultTracker;
private ExactCallLogger exactCallLogger = null;
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
@ -74,7 +66,7 @@ public abstract class AFCalc implements Cloneable {
}
public void enableProcessLog(final File exactCallsLog) {
initializeOutputFile(exactCallsLog);
exactCallLogger = new ExactCallLogger(exactCallsLog);
}
public void setLogger(Logger logger) {
@ -102,8 +94,8 @@ public abstract class AFCalc implements Cloneable {
final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors);
final long nanoTime = callTimer.getElapsedTimeNano();
if ( callReport != null )
printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, resultTracker.getLog10PosteriorOfAFzero());
if ( exactCallLogger != null )
exactCallLogger.printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result);
return result;
}
@ -170,55 +162,8 @@ public abstract class AFCalc implements Cloneable {
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
}
// ---------------------------------------------------------------------------
//
// Print information about the call to the calls log
//
// ---------------------------------------------------------------------------
private void initializeOutputFile(final File outputFile) {
try {
if (outputFile != null) {
callReport = new PrintStream( new FileOutputStream(outputFile) );
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
}
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
}
}
private void printCallInfo(final VariantContext vc,
final double[] log10AlleleFrequencyPriors,
final long runtimeNano,
final double log10PosteriorOfAFzero) {
printCallElement(vc, "type", "ignore", vc.getType());
int allelei = 0;
for ( final Allele a : vc.getAlleles() )
printCallElement(vc, "allele", allelei++, a.getDisplayString());
for ( final Genotype g : vc.getGenotypes() )
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ )
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero);
callReport.flush();
}
private void printCallElement(final VariantContext vc,
final Object variable,
final Object key,
final Object value) {
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
}
public AFCalcResultTracker getResultTracker() {
return resultTracker;
}
}

View File

@ -30,10 +30,6 @@ public class AFCalcFactory {
/** reference implementation of multi-allelic EXACT model */
EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2, -1),
/** expt. implementation */
@Deprecated
EXACT_CONSTRAINED(ConstrainedDiploidExactAFCalc.class, 2, -1),
/** expt. implementation -- for testing only */
EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1),

View File

@ -31,10 +31,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* Describes the results of the AFCalc
@ -217,6 +214,14 @@ public class AFCalcResult {
return log10PriorsOfAC[AF1p];
}
@Override
public String toString() {
final List<String> byAllele = new LinkedList<String>();
for ( final Allele a : getAllelesUsedInGenotyping() )
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a)));
return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele));
}
/**
* Are we sufficiently confidence in being non-ref that the site is considered polymorphic?
*
@ -233,6 +238,19 @@ public class AFCalcResult {
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
}
/**
* Are any of the alleles polymorphic w.r.t. #isPolymorphic?
*
* @param log10minPNonRef the confidence threshold, in log10 space
* @return true if any are poly, false otherwise
*/
public boolean anyPolymorphic(final double log10minPNonRef) {
for ( final Allele a : getAllelesUsedInGenotyping() )
if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) )
return true;
return false;
}
/**
* Returns the log10 probability that allele is segregating
*
@ -266,15 +284,7 @@ public class AFCalcResult {
final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length];
for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ )
log10UnnormalizedPosteriors[i] = log10LikelihoodsOfAC[i] + log10PriorsOfAC[i];
// necessary because the posteriors may be so skewed that the log-space normalized value isn't
// good, so we have to try both log-space normalization as well as the real-space normalization if the
// result isn't good
final double[] logNormalized = MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, true);
if ( goodLog10ProbVector(logNormalized, logNormalized.length, true) )
return logNormalized;
else
return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false);
}
/**

View File

@ -1,107 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@Deprecated
public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc {
protected ConstrainedDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
}
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) {
final int[] maxACsToConsider = computeMaxACs(vc);
resultTracker.setAClimits(maxACsToConsider);
return new StateTracker(maxACsToConsider);
}
/**
* Computes the maximum ACs we need to consider for each alt allele
*
* Walks over the genotypes in VC, and computes for each alt allele the maximum
* AC we need to consider in that alt allele dimension. Does the calculation
* based on the PLs in each genotype g, choosing to update the max AC for the
* alt alleles corresponding to that PL. Only takes the first lowest PL,
* if there are multiple genotype configurations with the same PL value. It
* takes values in the order of the alt alleles.
*
* @param vc the variant context we will compute max alt alleles for
* @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the
* first alt allele.
*/
@Ensures("result != null")
protected final int[] computeMaxACs(final VariantContext vc) {
final int[] maxACs = new int[vc.getNAlleles()-1];
for ( final Genotype g : vc.getGenotypes() )
updateMaxACs(g, maxACs);
return maxACs;
}
/**
* Update the maximum achievable allele counts in maxAC according to the PLs in g
*
* Selects the maximum genotype configuration from the PLs in g, and updates
* the maxAC for this configure. For example, if the lowest PL is for 0/1, updates
* the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for
* many number of alt alleles (determined by length of maxACs).
*
* If the max PL occurs at 0/0, updates nothing
* Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have
* the same PL value, then updates the first one.
*
* Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL,
* then only first one (1) will be updated
*
* @param g the genotype to update
* @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele)
*/
@Requires({
"g != null",
"maxACs != null",
"goodMaxACs(maxACs)"})
private void updateMaxACs(final Genotype g, final int[] maxACs) {
final int[] PLs = g.getLikelihoods().getAsPLs();
int minPLi = 0;
int minPL = PLs[0];
for ( int i = 0; i < PLs.length; i++ ) {
if ( PLs[i] < minPL ) {
minPL = PLs[i];
minPLi = i;
}
}
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi);
updateMaxACs(maxACs, pair.alleleIndex1);
updateMaxACs(maxACs, pair.alleleIndex2);
}
/**
* Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref)
*
* If alleleI == 0 => doesn't update anything
* else maxACs[alleleI - 1]++
*
* @param maxACs array of max alt allele ACs
* @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles.
*/
@Requires({
"alleleI >= 0",
"(alleleI - 1) < maxACs.length",
"goodMaxACs(maxACs)"})
private void updateMaxACs(final int[] maxACs, final int alleleI) {
if ( alleleI > 0 )
maxACs[alleleI-1]++;
}
private static boolean goodMaxACs(final int[] maxACs) {
return MathUtils.sum(maxACs) >= 0;
}
}

View File

@ -36,7 +36,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
}
protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker);
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
return new StateTracker();
}
@Override
protected AFCalcResult computeLog10PNonRef(final VariantContext vc,

View File

@ -0,0 +1,179 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.*;
import java.util.*;
/**
* Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format
*
* Once opened, calls can be writen to disk with printCallInfo
*/
public class ExactCallLogger implements Cloneable {
private PrintStream callReport = null;
/**
* Create a new ExactCallLogger writing it's output to outputFile
*
* @param outputFile
*/
public ExactCallLogger(final File outputFile) {
try {
callReport = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFile), 10000000));
callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value")));
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
}
}
/**
* Summarizes information about an exact call that happened
*/
public static class ExactCall {
final VariantContext vc;
final long runtime;
final AFCalcResult originalCall;
public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) {
this.vc = vc;
this.runtime = runtime;
this.originalCall = originalCall;
}
@Override
public String toString() {
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s",
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
originalCall.getLog10PosteriorOfAFGT0(),
new AutoFormattingTime(runtime / 1e9).toString());
}
}
protected final void printCallInfo(final VariantContext vc,
final double[] log10AlleleFrequencyPriors,
final long runtimeNano,
final AFCalcResult result) {
printCallElement(vc, "type", "ignore", vc.getType());
int allelei = 0;
for (final Allele a : vc.getAlleles())
printCallElement(vc, "allele", allelei++, a.getDisplayString());
for (final Genotype g : vc.getGenotypes())
printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());
for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++)
printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);
printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0());
printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0());
for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
if ( allele.isNonReference() ) {
printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
}
}
callReport.flush();
}
@Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"})
private void printCallElement(final VariantContext vc,
final Object variable,
final Object key,
final Object value) {
final String loc = String.format("%s:%d", vc.getChr(), vc.getStart());
callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value)));
}
/**
* Read in a list of ExactCall objects from reader, keeping only those
* with starts in startsToKeep or all sites (if this is empty)
*
* @param reader a just-opened reader sitting at the start of the file
* @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept
* @param parser a genome loc parser to create genome locs
* @return a list of ExactCall objects in reader
* @throws IOException
*/
public static List<ExactCall> readExactLog(final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException {
if ( reader == null ) throw new IllegalArgumentException("reader cannot be null");
if ( startsToKeep == null ) throw new IllegalArgumentException("startsToKeep cannot be null");
if ( parser == null ) throw new IllegalArgumentException("GenomeLocParser cannot be null");
List<ExactCall> calls = new LinkedList<ExactCall>();
// skip the header line
reader.readLine();
// skip the first "type" line
reader.readLine();
while (true) {
final VariantContextBuilder builder = new VariantContextBuilder();
final List<Allele> alleles = new ArrayList<Allele>();
final List<Genotype> genotypes = new ArrayList<Genotype>();
final double[] posteriors = new double[2];
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
final List<Integer> mle = new ArrayList<Integer>();
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
long runtimeNano = -1;
GenomeLoc currentLoc = null;
while (true) {
final String line = reader.readLine();
if (line == null)
return calls;
final String[] parts = line.split("\t");
final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
final String variable = parts[1];
final String key = parts[2];
final String value = parts[3];
if (currentLoc == null)
currentLoc = lineLoc;
if (variable.equals("type")) {
if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
builder.alleles(alleles);
final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
builder.genotypes(genotypes);
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
calls.add(new ExactCall(builder.make(), runtimeNano, result));
}
break;
} else if (variable.equals("allele")) {
final boolean isRef = key.equals("0");
alleles.add(Allele.create(value, isRef));
} else if (variable.equals("PL")) {
final GenotypeBuilder gb = new GenotypeBuilder(key);
gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
genotypes.add(gb.make());
} else if (variable.equals("log10PosteriorOfAFEq0")) {
posteriors[0] = Double.valueOf(value);
} else if (variable.equals("log10PosteriorOfAFGt0")) {
posteriors[1] = Double.valueOf(value);
} else if (variable.equals("MLE")) {
mle.add(Integer.valueOf(value));
} else if (variable.equals("pNonRefByAllele")) {
final Allele a = Allele.create(key);
log10pNonRefByAllele.put(a, Double.valueOf(value));
} else if (variable.equals("runtime.nano")) {
runtimeNano = Long.valueOf(value);
} else {
// nothing to do
}
}
}
}
}

View File

@ -32,31 +32,97 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
/**
* Computes the conditional bi-allelic exact results
*
* Suppose vc contains 2 alt allele: A* with C and T. This function first computes:
*
* (1) P(D | AF_c > 0 && AF_t == *) [i.e., T can be anything]
*
* it then computes the conditional probability on AF_c == 0:
*
* (2) P(D | AF_t > 0 && AF_c == 0)
*
* Thinking about this visually, we have the following likelihood matrix where each cell is
* the P(D | AF_c == i && AF_t == j):
*
* 0 AF_c > 0
* -----------------
* 0 | |
* |--|-------------
* a | |
* f | |
* _ | |
* t | |
* > | |
* 0 | |
*
* What we really want to know how
*
* (3) P(D | AF_c == 0 & AF_t == 0)
*
* compares with
*
* (4) P(D | AF_c > 0 || AF_t > 0)
*
* This is effectively asking for the value in the upper left vs. the sum of all cells.
*
* This class implements the conditional likelihoods summation for any number of alt
* alleles, where each alt allele has its EXACT probability of segregating calculated by
* reducing each alt B into the case XB and computing P(D | AF_b > 0 ) as follows:
*
* Suppose we have for a A/B/C site the following GLs:
*
* AA AB BB AC BC CC
*
* and we want to get the bi-allelic GLs for X/B, where X is everything not B
*
* XX = AA + AC + CC (since X = A or C)
* XB = AB + BC
* BB = BB
*
* After each allele has its probability calculated we compute the joint posterior
* as P(D | AF_* == 0) = prod_i P (D | AF_i == 0), after applying the theta^i
* prior for the ith least likely allele.
*/
public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
/**
* The min. confidence of an allele to be included in the joint posterior.
*/
private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20);
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
/**
* Sorts AFCalcResults by their posteriors of AF > 0, so the
*/
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
@Override
public int compare(AFCalcResult o1, AFCalcResult o2) {
return Double.compare(o1.getLog10LikelihoodOfAFGT0(), o2.getLog10LikelihoodOfAFGT0());
return Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
}
}
private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef();
final ReferenceDiploidExactAFCalc refModel;
/**
* The AFCalc model we are using to do the bi-allelic computation
*/
final AFCalc biAlleleExactModel;
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
refModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
}
@Override
protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) {
return refModel.makeMaxLikelihood(vc, resultTracker);
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
}
/**
* Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call
*/
private static class MyAFCalcResult extends AFCalcResult {
/**
* List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call
*/
final List<AFCalcResult> supporting;
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
@ -68,121 +134,89 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
@Override
public AFCalcResult computeLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
final double log10LikelihoodOfRef = computelog10LikelihoodOfRef(vc);
final List<AFCalcResult> independentResultTrackers = computeAlleleConditionalExact(vc, log10AlleleFrequencyPriors);
final List<AFCalcResult> independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors);
final List<AFCalcResult> withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers);
return combineIndependentPNonRefs(vc, log10LikelihoodOfRef, withMultiAllelicPriors);
return combineIndependentPNonRefs(vc, withMultiAllelicPriors);
}
protected final double computelog10LikelihoodOfRef(final VariantContext vc) {
// this value just the likelihood of AF == 0 in the special constrained multi-allelic calculation
final List<double[]> allGLs = getGLs(vc.getGenotypes(), false);
double log10LikelihoodOfHomRef = 0.0;
// TODO -- can be easily optimized (currently looks at all GLs via getGLs)
for ( int i = 0; i < allGLs.size(); i++ ) {
final double[] GLs = allGLs.get(i);
log10LikelihoodOfHomRef += GLs[0];
//log10LikelihoodOfHomRef += MathUtils.normalizeFromLog10(GLs, true)[0];
}
return log10LikelihoodOfHomRef;
}
/**
* Computes the conditional bi-allelic exact results
* Compute the conditional exact AFCalcResult for each allele in vc independently, returning
* the result of each, in order of the alt alleles in VC
*
* Suppose vc contains 2 alt allele: A* with C and T. This function first computes:
*
* (1) P(D | AF_c > 0 && AF_t == *) [i.e., T can be anything]
*
* it then computes the conditional probability on AF_c == 0:
*
* (2) P(D | AF_t > 0 && AF_c == 0)
*
* Thinking about this visually, we have the following likelihood matrix where each cell is
* the P(D | AF_c == i && AF_t == j):
*
* 0 AF_c > 0
* -----------------
* 0 | |
* |--|-------------
* a | |
* f | |
* _ | |
* t | |
* > | |
* 0 | |
*
* What we really want to know how
*
* (3) P(D | AF_c == 0 & AF_t == 0)
*
* compares with
*
* (4) P(D | AF_c > 0 || AF_t > 0)
*
* This is effectively asking for the value in the upper left vs. the sum of all cells.
*
* The quantity (1) is the same of all cells except those with AF_c == 0, while (2) is the
* band at the top where AF_t > 0 and AF_c == 0
*
* So (4) is actually (1) + (2).
*
* (3) is the direct inverse of the (1) and (2), as we are simultaneously calculating
*
* (1*) P(D | AF_c == 0 && AF_t == *) [i.e., T can be anything]
* (2*) P(D | AF_t == 0 && AF_c == 0) [TODO -- note this value looks like the thing we are supposed to use]
*
* This function implements the conditional likelihoods summation for any number of alt
* alleles (not just the tri-allelic case), where each subsequent variant context is
* further constrained such that each already considered allele x has AF_x == 0 in the
* compute.
*
* @param vc
* @param log10AlleleFrequencyPriors
* @return
* @param vc the VariantContext we want to analyze
* @param log10AlleleFrequencyPriors the priors
* @return a list of the AFCalcResults for each bi-allelic sub context of vc
*/
protected List<AFCalcResult> computeAlleleConditionalExact(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
@Requires({"vc != null", "log10AlleleFrequencyPriors != null"})
@Ensures("goodIndependentResult(vc, result)")
protected final List<AFCalcResult> computeAlleleIndependentExact(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
final List<AFCalcResult> results = new LinkedList<AFCalcResult>();
for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) {
final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors);
results.add(resultTracker);
}
return results;
}
protected List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
/**
* Helper function to ensure that the computeAlleleIndependentExact is returning reasonable results
*/
private static boolean goodIndependentResult(final VariantContext vc, final List<AFCalcResult> results) {
if ( results.size() != vc.getNAlleles() - 1) return false;
for ( int i = 0; i < results.size(); i++ ) {
if ( results.get(i).getAllelesUsedInGenotyping().size() != 2 )
return false;
if ( ! results.get(i).getAllelesUsedInGenotyping().contains(vc.getAlternateAllele(i)) )
return false;
}
return true;
}
/**
* Returns the bi-allelic variant context for each alt allele in vc with bi-allelic likelihoods, in order
*
* @param vc the variant context to split. Must have n.alt.alleles > 1
* @return a bi-allelic variant context for each alt allele in vc
*/
@Requires({"vc != null", "vc.getNAlleles() > 1"})
@Ensures("result.size() == vc.getNAlleles() - 1")
protected final List<VariantContext> makeAlleleConditionalContexts(final VariantContext vc) {
final int nAltAlleles = vc.getNAlleles() - 1;
final List<VariantContext> vcs = new LinkedList<VariantContext>();
final List<Allele> afZeroAlleles = new LinkedList<Allele>();
for ( int altI = 0; altI < nAltAlleles; altI++ ) {
final Allele altAllele = vc.getAlternateAllele(altI);
final List<Allele> biallelic = Arrays.asList(vc.getReference(), altAllele);
vcs.add(biallelicCombinedGLs(vc, biallelic, afZeroAlleles, altI + 1));
//afZeroAlleles.add(altAllele);
vcs.add(biallelicCombinedGLs(vc, altI + 1));
}
return vcs;
}
protected VariantContext biallelicCombinedGLs(final VariantContext rootVC, final List<Allele> biallelic, final List<Allele> afZeroAlleles, final int allele2) {
/**
* Create a single bi-allelic variant context from rootVC with alt allele with index altAlleleIndex
*
* @param rootVC the root (potentially multi-allelic) variant context
* @param altAlleleIndex index of the alt allele, from 0 == first alt allele
* @return a bi-allelic variant context based on rootVC
*/
@Requires({"rootVC.getNAlleles() > 1", "altAlleleIndex < rootVC.getNAlleles()"})
@Ensures({"result.isBiallelic()"})
protected final VariantContext biallelicCombinedGLs(final VariantContext rootVC, final int altAlleleIndex) {
if ( rootVC.isBiallelic() ) {
if ( ! afZeroAlleles.isEmpty() ) throw new IllegalArgumentException("Root VariantContext is biallelic but afZeroAlleles wasn't empty: " + afZeroAlleles);
return rootVC;
} else {
final Set<Integer> allelesToDiscard = new HashSet<Integer>(rootVC.getAlleleIndices(afZeroAlleles));
final int nAlts = rootVC.getNAlleles() - 1;
final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples());
for ( final Genotype g : rootVC.getGenotypes() )
biallelicGenotypes.add(combineGLs(g, allele2, allelesToDiscard, nAlts));
biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts));
final VariantContextBuilder vcb = new VariantContextBuilder(rootVC);
vcb.alleles(biallelic);
final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1);
vcb.alleles(Arrays.asList(rootVC.getReference(), altAllele));
vcb.genotypes(biallelicGenotypes);
return vcb.make();
}
@ -203,30 +237,16 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
* XB = AB + BC
* BB = BB
*
* Supports the additional mode of simply dropping GLs whose allele index occurs in allelesToDiscard. This is
* useful in the case where you want to drop alleles (not combine them), such as above:
*
* AA AB BB AC BC CC
*
* and we want to get the bi-allelic GLs for X/B, where X is everything not B, but dropping C (index 2)
*
* XX = AA (since X = A and C is dropped)
* XB = AB
* BB = BB
*
* This allows us to recover partial GLs the correspond to any allele in allelesToDiscard having strictly
* AF == 0.
*
* @param original the original multi-allelic genotype
* @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0
* @param nAlts the total number of alt alleles
* @return a new biallelic genotype with appropriate PLs
*/
@Requires({"original.hasLikelihoods()", "! allelesToDiscard.contains(altIndex)"})
@Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"})
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
protected Genotype combineGLs(final Genotype original, final int altIndex, final Set<Integer> allelesToDiscard, final int nAlts ) {
protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) {
if ( original.isNonInformative() )
return new GenotypeBuilder(original).PL(new int[]{0,0,0}).alleles(BIALLELIC_NOCALL).make();
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts);
@ -236,10 +256,6 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
for ( int index = 0; index < normalizedPr.length; index++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index);
// just continue if we shouldn't include the pair because it's in the discard set
if ( discardAllelePair(pair, allelesToDiscard) )
continue;
if ( pair.alleleIndex1 == altIndex ) {
if ( pair.alleleIndex2 == altIndex )
// hom-alt case
@ -263,11 +279,7 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
}
protected boolean discardAllelePair(final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair, Set<Integer> allelesToDiscard) {
return allelesToDiscard.contains(pair.alleleIndex1) || allelesToDiscard.contains(pair.alleleIndex2);
}
protected List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
// sort the results, so the most likely allele is first
@ -291,10 +303,11 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
/**
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result
*
* TODO -- add more docs
*
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
*/
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
final double log10LikelihoodsOfACEq0,
final List<AFCalcResult> sortedResultsWithThetaNPriors) {
int nEvaluations = 0;
final int nAltAlleles = sortedResultsWithThetaNPriors.size();
@ -302,8 +315,8 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
final double[] log10PriorsOfAC = new double[2];
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
// this value is a sum in real space so we need to store values to sum up later
final double[] log10LikelihoodsOfACGt0 = new double[nAltAlleles];
// this value is a sum in log space
double log10PosteriorOfACEq0Sum = 0.0;
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
@ -316,7 +329,8 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0();
// the AF > 0 case requires us to store the normalized likelihood for later summation
log10LikelihoodsOfACGt0[altI] = sortedResultWithThetaNPriors.getLog10LikelihoodOfAFGT0();
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR )
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
@ -325,14 +339,19 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
}
// the log10 likelihoods are the sum of the log10 likelihoods across all alt alleles
final double[] log10LikelihoodsOfAC = new double[]{
log10LikelihoodsOfACEq0,
MathUtils.log10sumLog10(log10LikelihoodsOfACGt0)};
// In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C,
// the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently
// log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0
final double log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO);
final double[] log10LikelihoodsOfAC = new double[] {
// L + prior = posterior => L = poster - prior
log10PosteriorOfACEq0Sum - log10PriorsOfAC[0],
log10PosteriorOfACGt0 - log10PriorsOfAC[1]
};
return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(),
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true, true), // necessary to ensure all values < 0
MathUtils.normalizeFromLog10(log10PriorsOfAC, true), // priors incorporate multiple alt alleles, must be normalized
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), // necessary to ensure all values < 0
MathUtils.normalizeFromLog10(log10PriorsOfAC, true), // priors incorporate multiple alt alleles, must be normalized
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
}
}

View File

@ -1,13 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
}
protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) {
return new StateTracker();
}
}

View File

@ -596,7 +596,6 @@ public class MathUtils {
if (keepInLogSpace) {
for (int i = 0; i < array.length; i++) {
array[i] -= maxValue;
array[i] = Math.max(array[i], LOG10_P_OF_ZERO);
}
return array;
}
@ -613,8 +612,11 @@ public class MathUtils {
sum += normalized[i];
for (int i = 0; i < array.length; i++) {
double x = normalized[i] / sum;
if (takeLog10OfOutput)
x = Math.max(Math.log10(x), LOG10_P_OF_ZERO);
if (takeLog10OfOutput) {
x = Math.log10(x);
if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) )
x = array[i] - maxValue;
}
normalized[i] = x;
}

View File

@ -34,7 +34,12 @@ import java.io.PrintStream;
* to the provided output stream. For testing/debugging purposes.
*
* Log entries are of the following form (fields are tab-separated):
* LABEL VALUE KEY1 KEY2 ... KEY_N
* LABEL OPERATION VALUE KEY1 KEY2 ... KEY_N
*
* A header line is written before the log entries giving the dimensions of this NestedIntegerArray.
* It has the form:
*
* # LABEL SIZE_OF_FIRST_DIMENSION SIZE_OF_SECOND_DIMENSION ... SIZE_OF_NTH_DIMENSION
*
* @author David Roazen
*/
@ -43,6 +48,9 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
private PrintStream log;
private String logEntryLabel;
public static final String HEADER_LINE_PREFIX = "# ";
public enum NestedIntegerArrayOperation { GET, PUT };
/**
*
* @param log output stream to which to log update operations
@ -57,6 +65,37 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
}
this.log = log;
this.logEntryLabel = logEntryLabel != null ? logEntryLabel : "";
// Write the header line recording the dimensions of this NestedIntegerArray:
StringBuilder logHeaderLine = new StringBuilder();
logHeaderLine.append(HEADER_LINE_PREFIX);
logHeaderLine.append(this.logEntryLabel);
for ( int dimension : dimensions ) {
logHeaderLine.append("\t");
logHeaderLine.append(dimension);
}
this.log.println(logHeaderLine.toString());
}
@Override
public T get( final int... keys ) {
StringBuilder logEntry = new StringBuilder();
logEntry.append(logEntryLabel);
logEntry.append("\t");
logEntry.append(NestedIntegerArrayOperation.GET);
logEntry.append("\t"); // empty field for the datum value
for ( int key : keys ) {
logEntry.append("\t");
logEntry.append(key);
}
log.println(logEntry.toString());
return super.get(keys);
}
@Override
@ -67,6 +106,8 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
logEntry.append(logEntryLabel);
logEntry.append("\t");
logEntry.append(NestedIntegerArrayOperation.PUT);
logEntry.append("\t");
logEntry.append(value);
for ( int key : keys ) {
logEntry.append("\t");

View File

@ -298,12 +298,16 @@ public abstract class Genotype implements Comparable<Genotype> {
* @return true if all samples PLs are equal and == 0
*/
public boolean isNonInformative() {
for ( final int PL : getPL() ) {
if ( PL != 0 )
return false;
}
if ( getPL() == null )
return true;
else {
for ( final int PL : getPL() ) {
if ( PL != 0 )
return false;
}
return true;
return true;
}
}
/**

View File

@ -349,7 +349,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("beee9457d7cea42006ac45400db5e873"));
Arrays.asList("f3ff7fe0f15f31eadd726c711d6bf3de"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}

View File

@ -21,7 +21,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest {
for ( final int nct : Arrays.asList(1, 2) ) {
// tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct });
//// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct });
tests.add(new Object[]{ "BOTH", "78ce72d8f9d029313f5f2ceb02bb9822", nt, nct });
tests.add(new Object[]{ "BOTH", "8cad82c3a5f5b932042933f136663c8a", nt, nct });
}
return tests.toArray(new Object[][]{});

View File

@ -24,7 +24,6 @@
package org.broadinstitute.sting.queue
import function.QFunction
import java.io.File
import org.broadinstitute.sting.commandline._
import org.broadinstitute.sting.queue.util._
@ -96,18 +95,18 @@ class QCommandLine extends CommandLineProgram with Logging {
new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
}
private lazy val qStatusMessengerPluginManager = {
new PluginManager[QStatusMessenger](classOf[QStatusMessenger])
private lazy val qCommandPlugin = {
new PluginManager[QCommandPlugin](classOf[QCommandPlugin])
}
ClassFieldCache.parsingEngine = new ParsingEngine(this)
/**
* Takes the QScripts passed in, runs their script() methods, retrieves their generated
* functions, and then builds and runs a QGraph based on the dependencies.
*/
def execute = {
val allStatusMessengers = qStatusMessengerPluginManager.createAllTypes()
ClassFieldCache.parsingEngine = this.parser
val allCommandPlugins = qCommandPlugin.createAllTypes()
if (settings.qSettings.runName == null)
settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
@ -115,14 +114,24 @@ class QCommandLine extends CommandLineProgram with Logging {
settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp")
qGraph.initializeWithSettings(settings)
for (statusMessenger <- allStatusMessengers) {
loadArgumentsIntoObject(statusMessenger)
for (commandPlugin <- allCommandPlugins) {
loadArgumentsIntoObject(commandPlugin)
}
for (statusMessenger <- allStatusMessengers) {
statusMessenger.started()
for (commandPlugin <- allCommandPlugins) {
if (commandPlugin.statusMessenger != null)
commandPlugin.statusMessenger.started()
}
// TODO: Default command plugin argument?
val remoteFileConverter = (
for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null))
yield commandPlugin.remoteFileConverter
).headOption.getOrElse(null)
if (remoteFileConverter != null)
loadArgumentsIntoObject(remoteFileConverter)
val allQScripts = qScriptPluginManager.createAllTypes()
for (script <- allQScripts) {
logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
@ -137,10 +146,15 @@ class QCommandLine extends CommandLineProgram with Logging {
case e: Exception =>
throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e)
}
if (remoteFileConverter != null) {
if (remoteFileConverter.convertToRemoteEnabled)
script.mkRemoteOutputs(remoteFileConverter)
}
script.functions.foreach(qGraph.add(_))
logger.info("Added " + script.functions.size + " functions")
}
// Execute the job graph
qGraph.run()
@ -162,14 +176,16 @@ class QCommandLine extends CommandLineProgram with Logging {
if (!success) {
logger.info("Done with errors")
qGraph.logFailed()
for (statusMessenger <- allStatusMessengers)
statusMessenger.exit("Done with errors")
for (commandPlugin <- allCommandPlugins)
if (commandPlugin.statusMessenger != null)
commandPlugin.statusMessenger.exit("Done with errors")
1
} else {
if (settings.run) {
allQScripts.foreach(_.pushOutputs())
for (statusMessenger <- allStatusMessengers)
statusMessenger.done(allQScripts.map(_.remoteOutputs))
for (commandPlugin <- allCommandPlugins)
if (commandPlugin.statusMessenger != null)
commandPlugin.statusMessenger.done(allQScripts.map(_.remoteOutputs))
}
0
}
@ -189,7 +205,7 @@ class QCommandLine extends CommandLineProgram with Logging {
override def getArgumentSources = {
var plugins = Seq.empty[Class[_]]
plugins ++= qScriptPluginManager.getPlugins
plugins ++= qStatusMessengerPluginManager.getPlugins
plugins ++= qCommandPlugin.getPlugins
plugins.toArray
}
@ -200,11 +216,10 @@ class QCommandLine extends CommandLineProgram with Logging {
override def getArgumentSourceName(source: Class[_]) = {
if (classOf[QScript].isAssignableFrom(source))
qScriptPluginManager.getName(source.asSubclass(classOf[QScript]))
else if (classOf[QStatusMessenger].isAssignableFrom(source))
qStatusMessengerPluginManager.getName(source.asSubclass(classOf[QStatusMessenger]))
else if (classOf[QCommandPlugin].isAssignableFrom(source))
qCommandPlugin.getName(source.asSubclass(classOf[QCommandPlugin]))
else
null
}
/**

View File

@ -0,0 +1,9 @@
package org.broadinstitute.sting.queue
import engine.QStatusMessenger
import util.RemoteFileConverter
trait QCommandPlugin {
def statusMessenger: QStatusMessenger = null
def remoteFileConverter: RemoteFileConverter = null
}

View File

@ -108,6 +108,24 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
functions.foreach( f => add(f) )
}
/**
* Convert all @Output files to remote output files.
* @param remoteFileConverter Converter for files to remote files.
*/
def mkRemoteOutputs(remoteFileConverter: RemoteFileConverter) {
for (field <- outputFields) {
val fieldFile = ClassFieldCache.getFieldFile(this, field)
if (fieldFile != null && !fieldFile.isInstanceOf[RemoteFile]) {
val fieldName = ClassFieldCache.fullName(field)
val remoteFile = remoteFileConverter.convertToRemote(fieldFile, fieldName)
ClassFieldCache.setFieldValue(this, field, remoteFile)
}
}
}
/**
* Pull all remote files to the local disk.
*/
def pullInputs() {
val inputs = ClassFieldCache.getFieldFiles(this, inputFields)
for (remoteFile <- filterRemoteFiles(inputs)) {
@ -116,6 +134,9 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
}
}
/**
* Push all remote files from the local disk.
*/
def pushOutputs() {
val outputs = ClassFieldCache.getFieldFiles(this, outputFields)
for (remoteFile <- filterRemoteFiles(outputs)) {
@ -124,6 +145,10 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
}
}
/**
* List out the remote outputs
* @return the RemoteFile outputs by argument source
*/
def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] =
outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap

View File

@ -180,4 +180,15 @@ object ClassFieldCache {
case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown))
}
//
// other utilities
//
/**
* Retrieves the fullName of the argument
* @param field ArgumentSource to check
* @return Full name of the argument source
*/
def fullName(field: ArgumentSource) = field.createArgumentDefinitions().get(0).fullName
}

View File

@ -0,0 +1,21 @@
package org.broadinstitute.sting.queue.util
import java.io.File
trait RemoteFileConverter {
type RemoteFileType <: RemoteFile
/**
* If this remote file creator is capable of converting to a remote file.
* @return true if ready to convert
*/
def convertToRemoteEnabled: Boolean
/**
* Converts to a remote file
* @param file The original file
* @param name A "name" to use for the remote file
* @return The new version of this remote file.
*/
def convertToRemote(file: File, name: String): RemoteFileType
}