diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
index 876dbf039..cdbd43a7a 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@@ -81,6 +81,7 @@ import java.util.*;
*
The Fisher Strand test may not be calculated for certain complex indel cases or for multi-allelic sites.
*/
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
+ private final static boolean ENABLE_DEBUGGING = false;
private final static Logger logger = Logger.getLogger(FisherStrand.class);
private static final String FS = "FS";
@@ -99,6 +100,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
if (vc.isSNP() && stratifiedContexts != null) {
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1);
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST);
+ printTable("unfiltered", tableNoFiltering);
+ printTable("filtered", tableFiltering);
return pValueForBestTable(tableFiltering, tableNoFiltering);
}
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
@@ -203,6 +206,20 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
logger.info(String.format("%d %d; %d %d : %f", table[0][0], table[0][1], table[1][0], table[1][1], pValue));
}
+ /**
+ * Printing information to logger.info for debugging purposes
+ *
+ * @param name the name of the table
+ * @param table the table itself
+ */
+ private void printTable(final String name, final int[][] table) {
+ if ( ENABLE_DEBUGGING ) {
+ final String pValue = (String)annotationForOneTable(pValueForContingencyTable(table)).get(FS);
+ logger.info(String.format("FS %s (REF+, REF-, ALT+, ALT-) = (%d, %d, %d, %d) = %s",
+ name, table[0][0], table[0][1], table[1][0], table[1][1], pValue));
+ }
+ }
+
private static boolean rotateTable(int[][] table) {
table[0][0] -= 1;
table[1][0] += 1;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index 82b93aa55..17d0217f0 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -183,6 +183,10 @@ public class UnifiedGenotyper extends LocusWalker, Unif
@Output(doc="File to which variants should be written")
protected VariantContextWriter writer = null;
+ @Advanced
+ @Argument(fullName = "onlyEmitSamples", shortName = "onlyEmitSamples", doc = "If provided, only these samples will be emitted into the VCF, regardless of which samples are present in the BAM file", required = false)
+ protected Set onlyEmitSamples = Collections.emptySet();
+
@Hidden
@Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
protected PrintStream verboseWriter = null;
@@ -288,9 +292,16 @@ public class UnifiedGenotyper extends LocusWalker, Unif
// and perform any necessary initialization/validation steps
annotationEngine.invokeAnnotationInitializationMethods(headerInfo);
- writer.writeHeader(new VCFHeader(headerInfo, samples));
-
-
+ final Set samplesForHeader;
+ if ( ! onlyEmitSamples.isEmpty() ) {
+ // make sure that onlyEmitSamples is a subset of samples
+ if ( ! samples.containsAll(onlyEmitSamples) )
+ throw new UserException.BadArgumentValue("onlyEmitSamples", "must be a strict subset of the samples in the BAM files but is wasn't");
+ samplesForHeader = onlyEmitSamples;
+ } else {
+ samplesForHeader = samples;
+ }
+ writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
}
public static Set getHeaderInfo(final UnifiedArgumentCollection UAC,
@@ -387,7 +398,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif
try {
// we are actually making a call
sum.nCallsMade++;
- writer.add(call);
+ writer.add(subsetToEmitSamples(call));
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage());
}
@@ -403,6 +414,19 @@ public class UnifiedGenotyper extends LocusWalker, Unif
return sum;
}
+ /**
+ * Subset the VariantContext down to just the emitting samples, if onlyEmitSamples has been provided
+ * @param fullVC the VariantContext containing calls for all samples in the BAM files
+ * @return a VariantContext that has been appropriately reduced to a subset of samples, if required
+ */
+ private VariantContext subsetToEmitSamples(final VariantContext fullVC) {
+ if ( onlyEmitSamples.isEmpty() ) {
+ return fullVC;
+ } else {
+ return GATKVariantContextUtils.trimAlleles(fullVC.subContextFromSamples(onlyEmitSamples, false), false, true);
+ }
+ }
+
public void onTraversalDone(UGStatistics sum) {
if ( metricsWriter != null ) {
metricsWriter.println(String.format("Visited bases %d", sum.nBasesVisited));
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index d64ceb953..ed39d1b56 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -47,16 +47,22 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.util.BlockCompressedInputStream;
+import org.apache.commons.collections.IteratorUtils;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
+import org.broadinstitute.variant.variantcontext.Genotype;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.vcf.VCFCodec;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
+import java.util.List;
// ********************************************************************************** //
// Note that this class also serves as an integration test for the VariantAnnotator! //
@@ -324,4 +330,58 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
nLines++;
Assert.assertTrue(nLines > 0);
}
+
+ // --------------------------------------------------------------------------------------------------------------
+ //
+ // testing only emit samples
+ //
+ // --------------------------------------------------------------------------------------------------------------
+
+ @Test(enabled = true)
+ public void testOnlyEmitSample() throws Exception {
+ final String base = "-T UnifiedGenotyper -R " + b37KGReference + " -I "
+ + privateTestDir + "AFR.complex.variants.bam --disableDithering"
+ + " -o %s -L 20:10,000,000-10,100,000";
+ final WalkerTestSpec specAllSamples = new WalkerTestSpec(base, 1, Arrays.asList(""));
+ specAllSamples.disableShadowBCF();
+ final File allSamplesVCF = executeTest("testOnlyEmitSampleAllSamples", specAllSamples).first.get(0);
+ List allSampleVCs = IteratorUtils.toList(GATKVCFUtils.readAllVCs(allSamplesVCF, new VCFCodec()).getSecond());
+
+ final WalkerTestSpec onlyHG01879 = new WalkerTestSpec(base + " -onlyEmitSamples HG01879", 1, Arrays.asList(""));
+ onlyHG01879.disableShadowBCF();
+ final File onlyHG01879VCF = executeTest("testOnlyEmitSample", onlyHG01879).first.get(0);
+ List onlyHG01879VCs = IteratorUtils.toList(GATKVCFUtils.readAllVCs(onlyHG01879VCF, new VCFCodec()).getSecond());
+
+ Assert.assertEquals(allSampleVCs.size(), onlyHG01879VCs.size());
+ for ( int i = 0; i < allSampleVCs.size(); i++ ) {
+ final VariantContext allSampleVC = allSampleVCs.get(i);
+ final VariantContext onlyHG01879VC = onlyHG01879VCs.get(i);
+
+ if ( allSampleVC == null ) {
+ Assert.assertNull(onlyHG01879VC);
+ } else {
+ Assert.assertNotNull(onlyHG01879VC);
+
+ Assert.assertTrue(allSampleVC.getGenotypes().size() > 1, "All samples should have had more than 1 genotype, but didn't");
+ Assert.assertEquals(onlyHG01879VC.getGenotypes().size(), 1, "Should have found a single sample genotype, but didn't");
+ Assert.assertEquals(onlyHG01879VC.hasGenotype("HG01879"), true);
+
+ Assert.assertEquals(allSampleVC.getStart(), onlyHG01879VC.getStart());
+ Assert.assertEquals(allSampleVC.getChr(), onlyHG01879VC.getChr());
+ Assert.assertEquals(allSampleVC.getEnd(), onlyHG01879VC.getEnd());
+ Assert.assertEquals(allSampleVC.getFilters(), onlyHG01879VC.getFilters());
+ Assert.assertEquals(allSampleVC.getAlleles(), onlyHG01879VC.getAlleles());
+ Assert.assertEquals(allSampleVC.getAttributes(), onlyHG01879VC.getAttributes());
+ Assert.assertEquals(allSampleVC.getPhredScaledQual(), onlyHG01879VC.getPhredScaledQual());
+
+ final Genotype allG = allSampleVC.getGenotype("HG01879");
+ final Genotype onlyG = onlyHG01879VC.getGenotype("HG01879");
+ Assert.assertEquals(allG.getAD(), onlyG.getAD());
+ Assert.assertEquals(allG.getDP(), onlyG.getDP());
+ Assert.assertEquals(allG.getAlleles(), onlyG.getAlleles());
+ Assert.assertEquals(allG.getPL(), onlyG.getPL());
+ Assert.assertEquals(allG.toString(), onlyG.toString());
+ }
+ }
+ }
}