Merge pull request #714 from broadinstitute/pd_hc_samplename
Add the --sample_name argument to HaplotypeCaller
This commit is contained in:
commit
34e5cce553
|
|
@ -341,6 +341,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
private HaplotypeCallerArgumentCollection SCAC = new HaplotypeCallerArgumentCollection();
|
private HaplotypeCallerArgumentCollection SCAC = new HaplotypeCallerArgumentCollection();
|
||||||
|
|
||||||
|
@Argument(fullName="sample_name", shortName = "sn", doc="Name of single sample to use from a multi-sample bam. The name is case-sensitive", required=false)
|
||||||
|
protected String sampleNameToUse = null;
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------
|
||||||
// arguments to control internal behavior of the read threading assembler
|
// arguments to control internal behavior of the read threading assembler
|
||||||
// -----------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------
|
||||||
|
|
@ -655,7 +658,20 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
|
|
||||||
final GenomeAnalysisEngine toolkit = getToolkit();
|
final GenomeAnalysisEngine toolkit = getToolkit();
|
||||||
samplesList = toolkit.getReadSampleList();
|
samplesList = toolkit.getReadSampleList();
|
||||||
final Set<String> sampleSet = SampleListUtils.asSet(samplesList);
|
Set<String> sampleSet = SampleListUtils.asSet(samplesList);
|
||||||
|
|
||||||
|
if (sampleNameToUse != null) {
|
||||||
|
if (!sampleSet.contains(sampleNameToUse))
|
||||||
|
throw new UserException.BadArgumentValue("sample_name", "Specified name does not exist in input bam files");
|
||||||
|
if (sampleSet.size() == 1) {
|
||||||
|
//No reason to incur performance penalty associated with filtering if they specified the name of the only sample
|
||||||
|
sampleNameToUse = null;
|
||||||
|
} else {
|
||||||
|
samplesList = new IndexedSampleList(sampleNameToUse);
|
||||||
|
sampleSet = SampleListUtils.asSet(samplesList);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||||
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
|
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
|
||||||
|
|
@ -773,7 +789,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
|
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
|
||||||
if ( emitReferenceConfidence() ) {
|
if ( emitReferenceConfidence() ) {
|
||||||
if ( samples.sampleCount() != 1 )
|
if ( samples.sampleCount() != 1 )
|
||||||
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
|
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently. Use the sample_name argument");
|
||||||
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
|
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
|
||||||
if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
|
if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
|
||||||
// a kluge to enforce the use of this indexing strategy
|
// a kluge to enforce the use of this indexing strategy
|
||||||
|
|
@ -885,6 +901,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
// we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work
|
// we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work
|
||||||
return NO_CALLS;
|
return NO_CALLS;
|
||||||
|
|
||||||
|
if (sampleNameToUse != null)
|
||||||
|
removeReadsFromAllSamplesExcept(sampleNameToUse, originalActiveRegion);
|
||||||
|
|
||||||
if( !originalActiveRegion.isActive() )
|
if( !originalActiveRegion.isActive() )
|
||||||
// Not active so nothing to do!
|
// Not active so nothing to do!
|
||||||
return referenceModelForNoVariation(originalActiveRegion, true);
|
return referenceModelForNoVariation(originalActiveRegion, true);
|
||||||
|
|
@ -1270,4 +1289,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
FragmentUtils.adjustQualsOfOverlappingPairedFragments(overlappingPair);
|
FragmentUtils.adjustQualsOfOverlappingPairedFragments(overlappingPair);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void removeReadsFromAllSamplesExcept(final String targetSample, final ActiveRegion activeRegion) {
|
||||||
|
final Set<GATKSAMRecord> readsToRemove = new LinkedHashSet<>();
|
||||||
|
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
|
||||||
|
if( !rec.getReadGroup().getSample().equals(targetSample) ) {
|
||||||
|
readsToRemove.add(rec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
activeRegion.removeAll( readsToRemove );
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
||||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a64e2d8e60a1be9cb0b4306c85e96393"});
|
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a64e2d8e60a1be9cb0b4306c85e96393"});
|
||||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
|
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
|
||||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"});
|
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"});
|
||||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "de763f6c9a5aec4586a2671941e4c96d"});
|
|
||||||
|
final String NA12878bandedResolutionMD5 = "de763f6c9a5aec4586a2671941e4c96d";
|
||||||
|
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
|
||||||
|
tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878",
|
||||||
|
ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue