Add the --sample_name argument to HaplotypeCaller
* This is a shortcut for people who have multi-sample BAMs but would like to use GVCF mode. Rather than creating single-sample BAMs with PrintReads, one could use the --sample_name argument to HaplotypeCaller to specify the single sample to make calls on * Completes PT 73075482
This commit is contained in:
parent
2b129ba707
commit
931890915f
|
|
@ -341,6 +341,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
@ArgumentCollection
|
||||
private HaplotypeCallerArgumentCollection SCAC = new HaplotypeCallerArgumentCollection();
|
||||
|
||||
@Argument(fullName="sample_name", shortName = "sn", doc="Name of single sample to use from a multi-sample bam. The name is case-sensitive", required=false)
|
||||
protected String sampleNameToUse = null;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
// arguments to control internal behavior of the read threading assembler
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
|
@ -655,8 +658,21 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
|
||||
final GenomeAnalysisEngine toolkit = getToolkit();
|
||||
samplesList = toolkit.getReadSampleList();
|
||||
final Set<String> sampleSet = SampleListUtils.asSet(samplesList);
|
||||
Set<String> sampleSet = SampleListUtils.asSet(samplesList);
|
||||
|
||||
if (sampleNameToUse != null) {
|
||||
if (!sampleSet.contains(sampleNameToUse))
|
||||
throw new UserException.BadArgumentValue("sample_name", "Specified name does not exist in input bam files");
|
||||
if (sampleSet.size() == 1) {
|
||||
//No reason to incur performance penalty associated with filtering if they specified the name of the only sample
|
||||
sampleNameToUse = null;
|
||||
} else {
|
||||
samplesList = new IndexedSampleList(sampleNameToUse);
|
||||
sampleSet = SampleListUtils.asSet(samplesList);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
|
||||
simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY;
|
||||
|
|
@ -773,7 +789,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
|
||||
if ( emitReferenceConfidence() ) {
|
||||
if ( samples.sampleCount() != 1 )
|
||||
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
|
||||
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently. Use the sample_name argument");
|
||||
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
|
||||
if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
|
||||
// a kluge to enforce the use of this indexing strategy
|
||||
|
|
@ -885,6 +901,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
// we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work
|
||||
return NO_CALLS;
|
||||
|
||||
if (sampleNameToUse != null)
|
||||
removeReadsFromAllSamplesExcept(sampleNameToUse, originalActiveRegion);
|
||||
|
||||
if( !originalActiveRegion.isActive() )
|
||||
// Not active so nothing to do!
|
||||
return referenceModelForNoVariation(originalActiveRegion, true);
|
||||
|
|
@ -1270,4 +1289,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
FragmentUtils.adjustQualsOfOverlappingPairedFragments(overlappingPair);
|
||||
}
|
||||
}
|
||||
|
||||
private void removeReadsFromAllSamplesExcept(final String targetSample, final ActiveRegion activeRegion) {
|
||||
final Set<GATKSAMRecord> readsToRemove = new LinkedHashSet<>();
|
||||
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
|
||||
if( !rec.getReadGroup().getSample().equals(targetSample) ) {
|
||||
readsToRemove.add(rec);
|
||||
}
|
||||
}
|
||||
activeRegion.removeAll( readsToRemove );
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -90,7 +90,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a64e2d8e60a1be9cb0b4306c85e96393"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "de763f6c9a5aec4586a2671941e4c96d"});
|
||||
|
||||
final String NA12878bandedResolutionMD5 = "de763f6c9a5aec4586a2671941e4c96d";
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
|
||||
tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878",
|
||||
ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue