Merge pull request #714 from broadinstitute/pd_hc_samplename

Add the --sample_name argument to HaplotypeCaller
This commit is contained in:
Eric Banks 2014-08-23 20:57:44 -04:00
commit 34e5cce553
2 changed files with 37 additions and 3 deletions

View File

@ -341,6 +341,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@ArgumentCollection
private HaplotypeCallerArgumentCollection SCAC = new HaplotypeCallerArgumentCollection();
@Argument(fullName="sample_name", shortName = "sn", doc="Name of single sample to use from a multi-sample bam. The name is case-sensitive", required=false)
protected String sampleNameToUse = null;
// -----------------------------------------------------------------------------------------------
// arguments to control internal behavior of the read threading assembler
// -----------------------------------------------------------------------------------------------
@ -655,8 +658,21 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final GenomeAnalysisEngine toolkit = getToolkit();
samplesList = toolkit.getReadSampleList();
final Set<String> sampleSet = SampleListUtils.asSet(samplesList);
Set<String> sampleSet = SampleListUtils.asSet(samplesList);
if (sampleNameToUse != null) {
if (!sampleSet.contains(sampleNameToUse))
throw new UserException.BadArgumentValue("sample_name", "Specified name does not exist in input bam files");
if (sampleSet.size() == 1) {
//No reason to incur performance penalty associated with filtering if they specified the name of the only sample
sampleNameToUse = null;
} else {
samplesList = new IndexedSampleList(sampleNameToUse);
sampleSet = SampleListUtils.asSet(samplesList);
}
}
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY;
@ -773,7 +789,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
if ( emitReferenceConfidence() ) {
if ( samples.sampleCount() != 1 )
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently. Use the sample_name argument");
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
// a kluge to enforce the use of this indexing strategy
@ -885,6 +901,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work
return NO_CALLS;
if (sampleNameToUse != null)
removeReadsFromAllSamplesExcept(sampleNameToUse, originalActiveRegion);
if( !originalActiveRegion.isActive() )
// Not active so nothing to do!
return referenceModelForNoVariation(originalActiveRegion, true);
@ -1270,4 +1289,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
FragmentUtils.adjustQualsOfOverlappingPairedFragments(overlappingPair);
}
}
private void removeReadsFromAllSamplesExcept(final String targetSample, final ActiveRegion activeRegion) {
final Set<GATKSAMRecord> readsToRemove = new LinkedHashSet<>();
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
if( !rec.getReadGroup().getSample().equals(targetSample) ) {
readsToRemove.add(rec);
}
}
activeRegion.removeAll( readsToRemove );
}
}

View File

@ -90,7 +90,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a64e2d8e60a1be9cb0b4306c85e96393"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "de763f6c9a5aec4586a2671941e4c96d"});
final String NA12878bandedResolutionMD5 = "de763f6c9a5aec4586a2671941e4c96d";
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878",
ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
return tests.toArray(new Object[][]{});
}