Merge pull request #368 from broadinstitute/md_incremental_caller
UG and script changes to make accessing the value of joint calling easier
This commit is contained in:
commit
8b5178bd59
|
|
@ -81,6 +81,7 @@ import java.util.*;
|
|||
* <p>The Fisher Strand test may not be calculated for certain complex indel cases or for multi-allelic sites.</p>
|
||||
*/
|
||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
private final static boolean ENABLE_DEBUGGING = false;
|
||||
private final static Logger logger = Logger.getLogger(FisherStrand.class);
|
||||
|
||||
private static final String FS = "FS";
|
||||
|
|
@ -99,6 +100,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
if (vc.isSNP() && stratifiedContexts != null) {
|
||||
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1);
|
||||
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST);
|
||||
printTable("unfiltered", tableNoFiltering);
|
||||
printTable("filtered", tableFiltering);
|
||||
return pValueForBestTable(tableFiltering, tableNoFiltering);
|
||||
}
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||
|
|
@ -203,6 +206,20 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
logger.info(String.format("%d %d; %d %d : %f", table[0][0], table[0][1], table[1][0], table[1][1], pValue));
|
||||
}
|
||||
|
||||
/**
|
||||
* Printing information to logger.info for debugging purposes
|
||||
*
|
||||
* @param name the name of the table
|
||||
* @param table the table itself
|
||||
*/
|
||||
private void printTable(final String name, final int[][] table) {
|
||||
if ( ENABLE_DEBUGGING ) {
|
||||
final String pValue = (String)annotationForOneTable(pValueForContingencyTable(table)).get(FS);
|
||||
logger.info(String.format("FS %s (REF+, REF-, ALT+, ALT-) = (%d, %d, %d, %d) = %s",
|
||||
name, table[0][0], table[0][1], table[1][0], table[1][1], pValue));
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean rotateTable(int[][] table) {
|
||||
table[0][0] -= 1;
|
||||
table[1][0] += 1;
|
||||
|
|
|
|||
|
|
@ -183,6 +183,10 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
@Output(doc="File to which variants should be written")
|
||||
protected VariantContextWriter writer = null;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName = "onlyEmitSamples", shortName = "onlyEmitSamples", doc = "If provided, only these samples will be emitted into the VCF, regardless of which samples are present in the BAM file", required = false)
|
||||
protected Set<String> onlyEmitSamples = Collections.emptySet();
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
||||
protected PrintStream verboseWriter = null;
|
||||
|
|
@ -288,9 +292,16 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
// and perform any necessary initialization/validation steps
|
||||
annotationEngine.invokeAnnotationInitializationMethods(headerInfo);
|
||||
|
||||
writer.writeHeader(new VCFHeader(headerInfo, samples));
|
||||
|
||||
|
||||
final Set<String> samplesForHeader;
|
||||
if ( ! onlyEmitSamples.isEmpty() ) {
|
||||
// make sure that onlyEmitSamples is a subset of samples
|
||||
if ( ! samples.containsAll(onlyEmitSamples) )
|
||||
throw new UserException.BadArgumentValue("onlyEmitSamples", "must be a strict subset of the samples in the BAM files but is wasn't");
|
||||
samplesForHeader = onlyEmitSamples;
|
||||
} else {
|
||||
samplesForHeader = samples;
|
||||
}
|
||||
writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
|
||||
}
|
||||
|
||||
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
|
||||
|
|
@ -387,7 +398,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
try {
|
||||
// we are actually making a call
|
||||
sum.nCallsMade++;
|
||||
writer.add(call);
|
||||
writer.add(subsetToEmitSamples(call));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage());
|
||||
}
|
||||
|
|
@ -403,6 +414,19 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subset the VariantContext down to just the emitting samples, if onlyEmitSamples has been provided
|
||||
* @param fullVC the VariantContext containing calls for all samples in the BAM files
|
||||
* @return a VariantContext that has been appropriately reduced to a subset of samples, if required
|
||||
*/
|
||||
private VariantContext subsetToEmitSamples(final VariantContext fullVC) {
|
||||
if ( onlyEmitSamples.isEmpty() ) {
|
||||
return fullVC;
|
||||
} else {
|
||||
return GATKVariantContextUtils.trimAlleles(fullVC.subContextFromSamples(onlyEmitSamples, false), false, true);
|
||||
}
|
||||
}
|
||||
|
||||
public void onTraversalDone(UGStatistics sum) {
|
||||
if ( metricsWriter != null ) {
|
||||
metricsWriter.println(String.format("Visited bases %d", sum.nBasesVisited));
|
||||
|
|
|
|||
|
|
@ -47,16 +47,22 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.util.BlockCompressedInputStream;
|
||||
import org.apache.commons.collections.IteratorUtils;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
// ********************************************************************************** //
|
||||
// Note that this class also serves as an integration test for the VariantAnnotator! //
|
||||
|
|
@ -324,4 +330,58 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
nLines++;
|
||||
Assert.assertTrue(nLines > 0);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing only emit samples
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testOnlyEmitSample() throws Exception {
|
||||
final String base = "-T UnifiedGenotyper -R " + b37KGReference + " -I "
|
||||
+ privateTestDir + "AFR.complex.variants.bam --disableDithering"
|
||||
+ " -o %s -L 20:10,000,000-10,100,000";
|
||||
final WalkerTestSpec specAllSamples = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||
specAllSamples.disableShadowBCF();
|
||||
final File allSamplesVCF = executeTest("testOnlyEmitSampleAllSamples", specAllSamples).first.get(0);
|
||||
List<VariantContext> allSampleVCs = IteratorUtils.toList(GATKVCFUtils.readAllVCs(allSamplesVCF, new VCFCodec()).getSecond());
|
||||
|
||||
final WalkerTestSpec onlyHG01879 = new WalkerTestSpec(base + " -onlyEmitSamples HG01879", 1, Arrays.asList(""));
|
||||
onlyHG01879.disableShadowBCF();
|
||||
final File onlyHG01879VCF = executeTest("testOnlyEmitSample", onlyHG01879).first.get(0);
|
||||
List<VariantContext> onlyHG01879VCs = IteratorUtils.toList(GATKVCFUtils.readAllVCs(onlyHG01879VCF, new VCFCodec()).getSecond());
|
||||
|
||||
Assert.assertEquals(allSampleVCs.size(), onlyHG01879VCs.size());
|
||||
for ( int i = 0; i < allSampleVCs.size(); i++ ) {
|
||||
final VariantContext allSampleVC = allSampleVCs.get(i);
|
||||
final VariantContext onlyHG01879VC = onlyHG01879VCs.get(i);
|
||||
|
||||
if ( allSampleVC == null ) {
|
||||
Assert.assertNull(onlyHG01879VC);
|
||||
} else {
|
||||
Assert.assertNotNull(onlyHG01879VC);
|
||||
|
||||
Assert.assertTrue(allSampleVC.getGenotypes().size() > 1, "All samples should have had more than 1 genotype, but didn't");
|
||||
Assert.assertEquals(onlyHG01879VC.getGenotypes().size(), 1, "Should have found a single sample genotype, but didn't");
|
||||
Assert.assertEquals(onlyHG01879VC.hasGenotype("HG01879"), true);
|
||||
|
||||
Assert.assertEquals(allSampleVC.getStart(), onlyHG01879VC.getStart());
|
||||
Assert.assertEquals(allSampleVC.getChr(), onlyHG01879VC.getChr());
|
||||
Assert.assertEquals(allSampleVC.getEnd(), onlyHG01879VC.getEnd());
|
||||
Assert.assertEquals(allSampleVC.getFilters(), onlyHG01879VC.getFilters());
|
||||
Assert.assertEquals(allSampleVC.getAlleles(), onlyHG01879VC.getAlleles());
|
||||
Assert.assertEquals(allSampleVC.getAttributes(), onlyHG01879VC.getAttributes());
|
||||
Assert.assertEquals(allSampleVC.getPhredScaledQual(), onlyHG01879VC.getPhredScaledQual());
|
||||
|
||||
final Genotype allG = allSampleVC.getGenotype("HG01879");
|
||||
final Genotype onlyG = onlyHG01879VC.getGenotype("HG01879");
|
||||
Assert.assertEquals(allG.getAD(), onlyG.getAD());
|
||||
Assert.assertEquals(allG.getDP(), onlyG.getDP());
|
||||
Assert.assertEquals(allG.getAlleles(), onlyG.getAlleles());
|
||||
Assert.assertEquals(allG.getPL(), onlyG.getPL());
|
||||
Assert.assertEquals(allG.toString(), onlyG.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue