Merge pull request #368 from broadinstitute/md_incremental_caller

UG and script changes to make accessing the value of joint calling easier
This commit is contained in:
Mark DePristo 2013-08-09 08:21:03 -07:00
commit 8b5178bd59
3 changed files with 105 additions and 4 deletions

View File

@ -81,6 +81,7 @@ import java.util.*;
* <p>The Fisher Strand test may not be calculated for certain complex indel cases or for multi-allelic sites.</p>
*/
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static boolean ENABLE_DEBUGGING = false;
private final static Logger logger = Logger.getLogger(FisherStrand.class);
private static final String FS = "FS";
@ -99,6 +100,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
if (vc.isSNP() && stratifiedContexts != null) {
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1);
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST);
printTable("unfiltered", tableNoFiltering);
printTable("filtered", tableFiltering);
return pValueForBestTable(tableFiltering, tableNoFiltering);
}
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
@ -203,6 +206,20 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
logger.info(String.format("%d %d; %d %d : %f", table[0][0], table[0][1], table[1][0], table[1][1], pValue));
}
/**
* Printing information to logger.info for debugging purposes
*
* @param name the name of the table
* @param table the table itself
*/
private void printTable(final String name, final int[][] table) {
if ( ENABLE_DEBUGGING ) {
final String pValue = (String)annotationForOneTable(pValueForContingencyTable(table)).get(FS);
logger.info(String.format("FS %s (REF+, REF-, ALT+, ALT-) = (%d, %d, %d, %d) = %s",
name, table[0][0], table[0][1], table[1][0], table[1][1], pValue));
}
}
private static boolean rotateTable(int[][] table) {
table[0][0] -= 1;
table[1][0] += 1;

View File

@ -183,6 +183,10 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
@Output(doc="File to which variants should be written")
protected VariantContextWriter writer = null;
@Advanced
@Argument(fullName = "onlyEmitSamples", shortName = "onlyEmitSamples", doc = "If provided, only these samples will be emitted into the VCF, regardless of which samples are present in the BAM file", required = false)
protected Set<String> onlyEmitSamples = Collections.emptySet();
@Hidden
@Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
protected PrintStream verboseWriter = null;
@ -288,9 +292,16 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// and perform any necessary initialization/validation steps
annotationEngine.invokeAnnotationInitializationMethods(headerInfo);
writer.writeHeader(new VCFHeader(headerInfo, samples));
final Set<String> samplesForHeader;
if ( ! onlyEmitSamples.isEmpty() ) {
// make sure that onlyEmitSamples is a subset of samples
if ( ! samples.containsAll(onlyEmitSamples) )
throw new UserException.BadArgumentValue("onlyEmitSamples", "must be a strict subset of the samples in the BAM files but is wasn't");
samplesForHeader = onlyEmitSamples;
} else {
samplesForHeader = samples;
}
writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
}
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
@ -387,7 +398,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
try {
// we are actually making a call
sum.nCallsMade++;
writer.add(call);
writer.add(subsetToEmitSamples(call));
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage());
}
@ -403,6 +414,19 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
return sum;
}
/**
* Subset the VariantContext down to just the emitting samples, if onlyEmitSamples has been provided
* @param fullVC the VariantContext containing calls for all samples in the BAM files
* @return a VariantContext that has been appropriately reduced to a subset of samples, if required
*/
private VariantContext subsetToEmitSamples(final VariantContext fullVC) {
if ( onlyEmitSamples.isEmpty() ) {
return fullVC;
} else {
return GATKVariantContextUtils.trimAlleles(fullVC.subContextFromSamples(onlyEmitSamples, false), false, true);
}
}
public void onTraversalDone(UGStatistics sum) {
if ( metricsWriter != null ) {
metricsWriter.println(String.format("Visited bases %d", sum.nBasesVisited));

View File

@ -47,16 +47,22 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.util.BlockCompressedInputStream;
import org.apache.commons.collections.IteratorUtils;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.vcf.VCFCodec;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
// ********************************************************************************** //
// Note that this class also serves as an integration test for the VariantAnnotator! //
@ -324,4 +330,58 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
nLines++;
Assert.assertTrue(nLines > 0);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing only emit samples
//
// --------------------------------------------------------------------------------------------------------------
@Test(enabled = true)
public void testOnlyEmitSample() throws Exception {
final String base = "-T UnifiedGenotyper -R " + b37KGReference + " -I "
+ privateTestDir + "AFR.complex.variants.bam --disableDithering"
+ " -o %s -L 20:10,000,000-10,100,000";
final WalkerTestSpec specAllSamples = new WalkerTestSpec(base, 1, Arrays.asList(""));
specAllSamples.disableShadowBCF();
final File allSamplesVCF = executeTest("testOnlyEmitSampleAllSamples", specAllSamples).first.get(0);
List<VariantContext> allSampleVCs = IteratorUtils.toList(GATKVCFUtils.readAllVCs(allSamplesVCF, new VCFCodec()).getSecond());
final WalkerTestSpec onlyHG01879 = new WalkerTestSpec(base + " -onlyEmitSamples HG01879", 1, Arrays.asList(""));
onlyHG01879.disableShadowBCF();
final File onlyHG01879VCF = executeTest("testOnlyEmitSample", onlyHG01879).first.get(0);
List<VariantContext> onlyHG01879VCs = IteratorUtils.toList(GATKVCFUtils.readAllVCs(onlyHG01879VCF, new VCFCodec()).getSecond());
Assert.assertEquals(allSampleVCs.size(), onlyHG01879VCs.size());
for ( int i = 0; i < allSampleVCs.size(); i++ ) {
final VariantContext allSampleVC = allSampleVCs.get(i);
final VariantContext onlyHG01879VC = onlyHG01879VCs.get(i);
if ( allSampleVC == null ) {
Assert.assertNull(onlyHG01879VC);
} else {
Assert.assertNotNull(onlyHG01879VC);
Assert.assertTrue(allSampleVC.getGenotypes().size() > 1, "All samples should have had more than 1 genotype, but didn't");
Assert.assertEquals(onlyHG01879VC.getGenotypes().size(), 1, "Should have found a single sample genotype, but didn't");
Assert.assertEquals(onlyHG01879VC.hasGenotype("HG01879"), true);
Assert.assertEquals(allSampleVC.getStart(), onlyHG01879VC.getStart());
Assert.assertEquals(allSampleVC.getChr(), onlyHG01879VC.getChr());
Assert.assertEquals(allSampleVC.getEnd(), onlyHG01879VC.getEnd());
Assert.assertEquals(allSampleVC.getFilters(), onlyHG01879VC.getFilters());
Assert.assertEquals(allSampleVC.getAlleles(), onlyHG01879VC.getAlleles());
Assert.assertEquals(allSampleVC.getAttributes(), onlyHG01879VC.getAttributes());
Assert.assertEquals(allSampleVC.getPhredScaledQual(), onlyHG01879VC.getPhredScaledQual());
final Genotype allG = allSampleVC.getGenotype("HG01879");
final Genotype onlyG = onlyHG01879VC.getGenotype("HG01879");
Assert.assertEquals(allG.getAD(), onlyG.getAD());
Assert.assertEquals(allG.getDP(), onlyG.getDP());
Assert.assertEquals(allG.getAlleles(), onlyG.getAlleles());
Assert.assertEquals(allG.getPL(), onlyG.getPL());
Assert.assertEquals(allG.toString(), onlyG.toString());
}
}
}
}