Reads are now realigned to the most likely haplotype before being used by the annotations.

-- AD,DP will now correspond directly to the reads that were used to construct the PLs
-- RankSumTests, etc. will use the bases from the realigned reads instead of the original alignments
-- There is now no additional runtime cost to realign the reads when using bamout or GVCF mode
-- bamout mode no longer sets the mapping quality to zero for uninformative reads, instead the read will not be given an HC tag
This commit is contained in:
Ryan Poplin 2014-06-11 15:45:45 -04:00
parent dc93507023
commit 0127799cba
21 changed files with 197 additions and 257 deletions

View File

@ -692,7 +692,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
private ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine() { private ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine() {
switch (likelihoodEngineImplementation) { switch (likelihoodEngineImplementation) {
case PairHMM: case PairHMM:
return new PairHMMLikelihoodCalculationEngine( (byte)gcpHMM, SCAC.DEBUG, pairHMM, log10GlobalReadMismappingRate, noFpga, pcrErrorModel ); return new PairHMMLikelihoodCalculationEngine( (byte)gcpHMM, pairHMM, log10GlobalReadMismappingRate, noFpga, pcrErrorModel );
case GraphBased: case GraphBased:
return new GraphBasedLikelihoodCalculationEngine( (byte)gcpHMM,log10GlobalReadMismappingRate,heterogeneousKmerSizeResultion,SCAC.DEBUG,debugGraphTransformations); return new GraphBasedLikelihoodCalculationEngine( (byte)gcpHMM,log10GlobalReadMismappingRate,heterogeneousKmerSizeResultion,SCAC.DEBUG,debugGraphTransformations);
case Random: case Random:
@ -839,8 +839,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final Map<String,List<GATKSAMRecord>> reads = splitReadsBySample( regionForGenotyping.getReads() ); final Map<String,List<GATKSAMRecord>> reads = splitReadsBySample( regionForGenotyping.getReads() );
// Calculate the likelihoods: CPU intesive part. // Calculate the likelihoods: CPU intesive part.
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,reads);
likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,reads);
// Realign all the reads to the most likely haplotype for use by the annotations
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> entry : stratifiedReadMap.entrySet() ) {
entry.getValue().realignReadsToMostLikelyHaplotype(haplotypes, assemblyResult.getPaddedReferenceLoc());
}
// Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there // Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
// was a bad interaction between that selection and the marginalization that happens over each event when computing // was a bad interaction between that selection and the marginalization that happens over each event when computing

View File

@ -254,12 +254,12 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap); if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap);
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC ); final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
final VariantContext call = calculateGenotypes(null,null,null,null,new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel, false,null); final VariantContext call = calculateGenotypes(null, null, null, null, new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel, false, null);
if( call != null ) { if( call != null ) {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( configuration.USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap : final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( configuration.USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) ); convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) );
if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap_annotations); if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap_annotations);
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = addFilteredReadList(genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call, true);
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call); VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
@ -285,7 +285,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* Add the <NON_REF> allele * Add the <NON_REF> allele
* @param stratifiedReadMap target per-read-allele-likelihood-map. * @param stratifiedReadMap target per-read-allele-likelihood-map.
*/ */
public static Map<String, PerReadAlleleLikelihoodMap> addMiscellaneousAllele(final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) { public static Map<String, PerReadAlleleLikelihoodMap> addMiscellaneousAllele(final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
final Allele miscellanoeusAllele = GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE; final Allele miscellanoeusAllele = GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE;
for (Map.Entry<String, PerReadAlleleLikelihoodMap> perSample : stratifiedReadMap.entrySet()) { for (Map.Entry<String, PerReadAlleleLikelihoodMap> perSample : stratifiedReadMap.entrySet()) {
for (Map.Entry<GATKSAMRecord, Map<Allele, Double>> perRead : perSample.getValue().getLikelihoodReadMap().entrySet()) { for (Map.Entry<GATKSAMRecord, Map<Allele, Double>> perRead : perSample.getValue().getLikelihoodReadMap().entrySet()) {
@ -430,19 +430,20 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return genotypes; return genotypes;
} }
private static Map<String, PerReadAlleleLikelihoodMap> filterToOnlyOverlappingReads( final GenomeLocParser parser, private static Map<String, PerReadAlleleLikelihoodMap> addFilteredReadList(final GenomeLocParser parser,
final Map<String, PerReadAlleleLikelihoodMap> perSampleReadMap, final Map<String, PerReadAlleleLikelihoodMap> perSampleReadMap,
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList, final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
final VariantContext call ) { final VariantContext call,
final boolean requireOverlap) {
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new LinkedHashMap<>(); final Map<String, PerReadAlleleLikelihoodMap> returnMap = new LinkedHashMap<>();
final GenomeLoc callLoc = parser.createGenomeLoc(call); final GenomeLoc callLoc = ( requireOverlap ? parser.createGenomeLoc(call) : null );
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perSampleReadMap.entrySet() ) { for( final Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perSampleReadMap.entrySet() ) {
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
for( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> mapEntry : sample.getValue().getLikelihoodReadMap().entrySet() ) { for( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> mapEntry : sample.getValue().getLikelihoodReadMap().entrySet() ) {
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
if( callLoc.overlapsP(parser.createGenomeLoc(mapEntry.getKey())) ) { // BUGBUG: This uses alignment start and stop, NOT soft start and soft end... if( !requireOverlap || callLoc.overlapsP(parser.createGenomeLocUnclipped(mapEntry.getKey())) ) {
for( final Map.Entry<Allele,Double> alleleDoubleEntry : mapEntry.getValue().entrySet() ) { for( final Map.Entry<Allele,Double> alleleDoubleEntry : mapEntry.getValue().entrySet() ) {
likelihoodMap.add(mapEntry.getKey(), alleleDoubleEntry.getKey(), alleleDoubleEntry.getValue()); likelihoodMap.add(mapEntry.getKey(), alleleDoubleEntry.getKey(), alleleDoubleEntry.getValue());
} }
@ -452,7 +453,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods // add all filtered reads to the NO_CALL list because they weren't given any likelihoods
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) { for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { if( !requireOverlap || callLoc.overlapsP(parser.createGenomeLocUnclipped(read)) ) {
for( final Allele allele : call.getAlleles() ) { for( final Allele allele : call.getAlleles() ) {
likelihoodMap.add(read, allele, 0.0); likelihoodMap.add(read, allele, 0.0);
} }

View File

@ -74,7 +74,6 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
private final byte constantGCP; private final byte constantGCP;
private final double log10globalReadMismappingRate; private final double log10globalReadMismappingRate;
private final boolean DEBUG;
private final PairHMM.HMM_IMPLEMENTATION hmmType; private final PairHMM.HMM_IMPLEMENTATION hmmType;
private final boolean noFpga; private final boolean noFpga;
@ -141,7 +140,6 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
* Create a new PairHMMLikelihoodCalculationEngine using provided parameters and hmm to do its calculations * Create a new PairHMMLikelihoodCalculationEngine using provided parameters and hmm to do its calculations
* *
* @param constantGCP the gap continuation penalty to use with the PairHMM * @param constantGCP the gap continuation penalty to use with the PairHMM
* @param debug should we emit debugging information during the calculation?
* @param hmmType the type of the HMM to use * @param hmmType the type of the HMM to use
* @param log10globalReadMismappingRate the global mismapping probability, in log10(prob) units. A value of * @param log10globalReadMismappingRate the global mismapping probability, in log10(prob) units. A value of
* -3 means that the chance that a read doesn't actually belong at this * -3 means that the chance that a read doesn't actually belong at this
@ -153,10 +151,9 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
* assigned a likelihood of -13. * assigned a likelihood of -13.
* @param noFpga disable FPGA acceleration * @param noFpga disable FPGA acceleration
*/ */
public PairHMMLikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType, final double log10globalReadMismappingRate, final boolean noFpga, final PCR_ERROR_MODEL pcrErrorModel ) { public PairHMMLikelihoodCalculationEngine( final byte constantGCP, final PairHMM.HMM_IMPLEMENTATION hmmType, final double log10globalReadMismappingRate, final boolean noFpga, final PCR_ERROR_MODEL pcrErrorModel ) {
this.hmmType = hmmType; this.hmmType = hmmType;
this.constantGCP = constantGCP; this.constantGCP = constantGCP;
this.DEBUG = debug;
this.log10globalReadMismappingRate = log10globalReadMismappingRate; this.log10globalReadMismappingRate = log10globalReadMismappingRate;
this.noFpga = noFpga; this.noFpga = noFpga;
this.pcrErrorModel = pcrErrorModel; this.pcrErrorModel = pcrErrorModel;
@ -374,21 +371,6 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
} }
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
// Add likelihoods for each sample's reads to our stratifiedReadMap
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = new LinkedHashMap<>();
for( final Map.Entry<String, List<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
// evaluate the likelihood of the reads given those haplotypes
final PerReadAlleleLikelihoodMap map = computeReadLikelihoods(haplotypes, sampleEntry.getValue());
map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE);
stratifiedReadMap.put(sampleEntry.getKey(), map);
}
return stratifiedReadMap;
}
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List<Haplotype> haplotypes, final List<GATKSAMRecord> reads) { private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List<Haplotype> haplotypes, final List<GATKSAMRecord> reads) {
// Modify the read qualities by applying the PCR error model and capping the minimum base,insertion,deletion qualities // Modify the read qualities by applying the PCR error model and capping the minimum base,insertion,deletion qualities

View File

@ -55,15 +55,12 @@ import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion; import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.ReadDestination;
import org.broadinstitute.gatk.utils.locusiterator.LocusIteratorByState; import org.broadinstitute.gatk.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.gatk.utils.sam.AlignmentUtils; import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.ReadUtils;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFHeaderLine;
@ -89,7 +86,6 @@ public class ReferenceConfidenceModel {
private final GenomeLocParser genomeLocParser; private final GenomeLocParser genomeLocParser;
private final Set<String> samples; private final Set<String> samples;
private final SAMFileHeader header; // TODO -- really shouldn't depend on this
private final int indelInformativeDepthIndelSize; private final int indelInformativeDepthIndelSize;
private final static boolean WRITE_DEBUGGING_BAM = false; private final static boolean WRITE_DEBUGGING_BAM = false;
@ -117,7 +113,6 @@ public class ReferenceConfidenceModel {
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
this.samples = samples; this.samples = samples;
this.header = header;
this.indelInformativeDepthIndelSize = indelInformativeDepthIndelSize; this.indelInformativeDepthIndelSize = indelInformativeDepthIndelSize;
if ( WRITE_DEBUGGING_BAM ) { if ( WRITE_DEBUGGING_BAM ) {
@ -326,24 +321,13 @@ public class ReferenceConfidenceModel {
if ( stratifiedReadMap == null ) throw new IllegalArgumentException("stratifiedReadMap cannot be null"); if ( stratifiedReadMap == null ) throw new IllegalArgumentException("stratifiedReadMap cannot be null");
if ( stratifiedReadMap.size() != 1 ) throw new IllegalArgumentException("stratifiedReadMap must contain exactly one sample but it contained " + stratifiedReadMap.size()); if ( stratifiedReadMap.size() != 1 ) throw new IllegalArgumentException("stratifiedReadMap must contain exactly one sample but it contained " + stratifiedReadMap.size());
List<GATKSAMRecord> realignedReads; final List<GATKSAMRecord> reads = activeRegion.getReads();
if( calledHaplotypes.size() == 1 ) { // only contains ref haplotype so an optimization is to just trust the alignments to the reference haplotype as provided by the aligner
realignedReads = activeRegion.getReads();
} else {
final ReadDestination.ToList realignedReadsDest = new ReadDestination.ToList(header, "FOO");
final HaplotypeBAMWriter writer = HaplotypeBAMWriter.create(HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES, realignedReadsDest);
writer.setWriteHaplotypesAsWell(false); // don't write out reads for the haplotypes, as we only want the realigned reads themselves
writer.setOnlyRealignInformativeReads(true);
writer.writeReadsAlignedToHaplotypes(calledHaplotypes, paddedReferenceLoc, stratifiedReadMap);
realignedReads = ReadUtils.sortReadsByCoordinate(realignedReadsDest.getReads());
}
if ( debuggingWriter != null ) if ( debuggingWriter != null )
for ( final GATKSAMRecord read : realignedReads ) for ( final GATKSAMRecord read : reads )
debuggingWriter.addAlignment(read); debuggingWriter.addAlignment(read);
final LocusIteratorByState libs = new LocusIteratorByState(realignedReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING,
true, genomeLocParser, samples, false); true, genomeLocParser, samples, false);
final List<ReadBackedPileup> pileups = new LinkedList<>(); final List<ReadBackedPileup> pileups = new LinkedList<>();

View File

@ -81,16 +81,9 @@ class AllHaplotypeBAMWriter extends HaplotypeBAMWriter {
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) { final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
writeHaplotypesAsReads(haplotypes, new HashSet<>(bestHaplotypes), paddedReferenceLoc); writeHaplotypesAsReads(haplotypes, new HashSet<>(bestHaplotypes), paddedReferenceLoc);
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : haplotypes )
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
// next, output the interesting reads for each sample aligned against the appropriate haplotype
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) { for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) { for( final GATKSAMRecord read : readAlleleLikelihoodMap.getLikelihoodReadMap().keySet() ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue()); writeReadAgainstHaplotype(read);
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
} }
} }
} }

View File

@ -88,22 +88,9 @@ class CalledHaplotypeBAMWriter extends HaplotypeBAMWriter {
writeHaplotypesAsReads(calledHaplotypes, calledHaplotypes, paddedReferenceLoc); writeHaplotypesAsReads(calledHaplotypes, calledHaplotypes, paddedReferenceLoc);
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : calledHaplotypes ) {
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
}
// the set of all alleles that were actually called
final Set<Allele> allelesOfCalledHaplotypes = alleleToHaplotypeMap.keySet();
// next, output the interesting reads for each sample aligned against one of the called haplotypes
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) { for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) { for ( final GATKSAMRecord read : readAlleleLikelihoodMap.getLikelihoodReadMap().keySet() ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue(), allelesOfCalledHaplotypes); writeReadAgainstHaplotype(read);
final Haplotype haplotype = alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele());
if (haplotype == null) continue;
writeReadAgainstHaplotype(entry.getKey(), haplotype, paddedReferenceLoc.getStart(), bestAllele.isInformative());
} }
} }
} }

View File

@ -78,7 +78,6 @@ public abstract class HaplotypeBAMWriter {
private long uniqueNameCounter = 1; private long uniqueNameCounter = 1;
protected final static String READ_GROUP_ID = "ArtificialHaplotype"; protected final static String READ_GROUP_ID = "ArtificialHaplotype";
protected final static String HAPLOTYPE_TAG = "HC";
private final ReadDestination output; private final ReadDestination output;
private boolean writeHaplotypesAsWell = true; private boolean writeHaplotypesAsWell = true;
@ -173,108 +172,10 @@ public abstract class HaplotypeBAMWriter {
/** /**
* Write out read aligned to haplotype to the BAM file * Write out read aligned to haplotype to the BAM file
* *
* Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
* via the alignment of haplotype (via its getCigar) method.
*
* @param originalRead the read we want to write aligned to the reference genome * @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
*/ */
protected void writeReadAgainstHaplotype(final GATKSAMRecord originalRead, protected void writeReadAgainstHaplotype(final GATKSAMRecord originalRead) {
final Haplotype haplotype, output.add(originalRead);
final int referenceStart,
final boolean isInformative) {
if( onlyRealignInformativeReads && !isInformative ) {
if( originalRead != null ) {
output.add(originalRead);
}
} else if (haplotype == null) {
output.add(originalRead);
return;
} else {
final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart, isInformative);
if ( alignedToRef != null ) {
output.add(alignedToRef);
} else {
output.add(originalRead);
}
}
}
/**
* Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
* via the alignment of haplotype (via its getCigar) method.
*
* @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
* @return a GATKSAMRecord aligned to reference, or null if no meaningful alignment is possible
*/
protected GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead,
final Haplotype haplotype,
final int referenceStart,
final boolean isInformative) {
if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null");
if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null");
if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype);
if ( referenceStart < 1 ) throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
try {
// compute the smith-waterman alignment of read -> haplotype
final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), originalRead.getReadBases(), CigarUtils.NEW_SW_PARAMETERS);
//swPairwiseAlignment.printAlignment(haplotype.getBases(), originalRead.getReadBases());
if ( swPairwiseAlignment.getAlignmentStart2wrt1() == -1 )
// sw can fail (reasons not clear) so if it happens just don't write the read
return null;
final Cigar swCigar = AlignmentUtils.consolidateCigar(swPairwiseAlignment.getCigar());
// since we're modifying the read we need to clone it
final GATKSAMRecord read = (GATKSAMRecord)originalRead.clone();
addHaplotypeTag(read, haplotype);
// uninformative reads are set to zero mapping quality to enhance visualization
if ( !isInformative )
read.setMappingQuality(0);
// compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
final int readStartOnHaplotype = AlignmentUtils.calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());
final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;
read.setAlignmentStart(readStartOnReference);
// compute the read -> ref alignment by mapping read -> hap -> ref from the
// SW of read -> hap mapped through the given by hap -> ref
final Cigar haplotypeToRef = AlignmentUtils.trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
final Cigar readToRefCigarRaw = AlignmentUtils.applyCigarToCigar(swCigar, haplotypeToRef);
final Cigar readToRefCigarClean = AlignmentUtils.cleanUpCigar(readToRefCigarRaw);
final Cigar readToRefCigar = AlignmentUtils.leftAlignIndel(readToRefCigarClean, haplotype.getBases(),
originalRead.getReadBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
read.setCigar(readToRefCigar);
if ( readToRefCigar.getReadLength() != read.getReadLength() )
throw new IllegalStateException("Cigar " + readToRefCigar + " with read length " + readToRefCigar.getReadLength()
+ " != read length " + read.getReadLength() + " for read " + read.format() + "\nhapToRef " + haplotypeToRef + " length " + haplotypeToRef.getReadLength() + "/" + haplotypeToRef.getReferenceLength()
+ "\nreadToHap " + swCigar + " length " + swCigar.getReadLength() + "/" + swCigar.getReferenceLength());
return read;
} catch ( CloneNotSupportedException e ) {
throw new IllegalStateException("GATKSAMRecords should support clone but this one does not " + originalRead);
}
}
/**
* Add a haplotype tag to the read based on haplotype
*
* @param read the read to add the tag to
* @param haplotype the haplotype that gives rises to read
*/
private void addHaplotypeTag(final GATKSAMRecord read, final Haplotype haplotype) {
// add a tag to the read that indicates which haplotype it best aligned to. It's a uniquish integer
read.setAttribute(HAPLOTYPE_TAG, haplotype.hashCode());
} }
/** /**
@ -309,7 +210,7 @@ public abstract class HaplotypeBAMWriter {
record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar())); record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar()));
record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0); record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0);
record.setReadName("HC" + uniqueNameCounter++); record.setReadName("HC" + uniqueNameCounter++);
addHaplotypeTag(record, haplotype); record.setAttribute(AlignmentUtils.HAPLOTYPE_TAG, haplotype.hashCode());
record.setReadUnmappedFlag(false); record.setReadUnmappedFlag(false);
record.setReferenceIndex(paddedRefLoc.getContigIndex()); record.setReferenceIndex(paddedRefLoc.getContigIndex());
record.setAttribute(SAMTag.RG.toString(), READ_GROUP_ID); record.setAttribute(SAMTag.RG.toString(), READ_GROUP_ID);

View File

@ -119,7 +119,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
@SuppressWarnings("unused") @SuppressWarnings("unused")
public void timeLoglessPairHMM(final int reps) { public void timeLoglessPairHMM(final int reps) {
for (int i = 0; i < reps; i++) { for (int i = 0; i < reps; i++) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10, false, final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE); PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
engine.computeReadLikelihoods(dataSet.assemblyResultSet(), Collections.singletonMap("anonymous", dataSet.readList())); engine.computeReadLikelihoods(dataSet.assemblyResultSet(), Collections.singletonMap("anonymous", dataSet.readList()));
} }

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleComplex1() { public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "d1626d5fff419b8a782de954224e881f"); HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "bb761c6cd0893e7e96c56bf1d8494588");
} }
private void HCTestSymbolicVariants(String bam, String args, String md5) { private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,13 +88,13 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleGGAComplex() { public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"724a05b7df716647014f29c0fe86e071"); "fb1042487efb03896f482c08d7eb2671");
} }
@Test @Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"437ce80af30631de077fd617b13cd2e4"); "9a6e314d7a8fe1c4a9ff5048544cb670");
} }
private void HCTestComplexConsensusMode(String bam, String args, String md5) { private void HCTestComplexConsensusMode(String bam, String args, String md5) {
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test @Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() { public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"45975c205e1202d19b235312588dd733"); "1e4af2f8fc97e0b07e625f142374742f");
} }
} }

View File

@ -67,12 +67,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data // this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "bb6c326616790076de5111a7a3f6feb5"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "080cf92bfcb7db4273e2d723045daee5"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "06d37e116871d8840d3de17a51049363"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "a4c0a28a451069faaac937e45c701043"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "53517b967a60d800a9d9d1d5a1bc54c9"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "0bc25b42dc73eeadb23a35227684172c"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "c95a777e3aac50b9f29302919a00ba4e"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "05860d40923e8b074576c787037c5768"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "f3abc3050b2708fc78c903fb70ac253e"});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testWrongGVCFNonVariantRecordOrderBugFix() { public void testWrongGVCFNonVariantRecordOrderBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("324eb46738a364cd7dc5fa0b62491a5e")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e286b5eb72e3f75e7b08e5729cdfe019"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec); executeTest("testMissingGVCFIndexingStrategyException", spec);
} }

View File

@ -84,27 +84,27 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSample() { public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "9e90f4ed1a04ce159ad3e560e62f5d6b"); HCTest(CEUTRIO_BAM, "", "030b45c91879eb41f30abbd10ed88fa9");
} }
@Test @Test
public void testHaplotypeCallerSingleSample() { public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "96f299a5cf411900b8eda3845c3ce465"); HCTest(NA12878_BAM, "", "6632475b1cefce9a0c5727bc919501e5");
} }
@Test @Test
public void testHaplotypeCallerMinBaseQuality() { public void testHaplotypeCallerMinBaseQuality() {
HCTest(NA12878_BAM, "-mbq 15", "6509cfd0554ecbb92a1b303fbcc0fcca"); HCTest(NA12878_BAM, "-mbq 15", "31fe63ea44407b6aa40261319a414488");
} }
@Test @Test
public void testHaplotypeCallerGraphBasedSingleSample() { public void testHaplotypeCallerGraphBasedSingleSample() {
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "f87bb723bfb9b8f604db1d53624d24e3"); HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "5a49b5b98247070e8de637a706b02db9");
} }
@Test @Test
public void testHaplotypeCallerGraphBasedMultiSample() { public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "8d1edb0ea25c55a8c825b4e1470e219f"); HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "d530933588810bb4cf04a9783d187433");
} }
@Test(enabled = false) // can't annotate the rsID's yet @Test(enabled = false) // can't annotate the rsID's yet
@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSampleGGA() { public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"d734a90b07dd0d6b051b7c0961787f98"); "844d1daa4a8835f4c7e10d0d8bbfe510");
} }
@Test @Test
@ -131,7 +131,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() { public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "d3fc49d3d3c8b6439548133e03faa540"); HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "ec77ac891cac9461b2eb011270166dc1");
} }
private void HCTestNearbySmallIntervals(String bam, String args, String md5) { private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -168,7 +168,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerNearbySmallIntervals() { public void testHaplotypeCallerNearbySmallIntervals() {
HCTestNearbySmallIntervals(NA12878_BAM, "", "a415bc76231a04dc38412ff38aa0dc49"); HCTestNearbySmallIntervals(NA12878_BAM, "", "59e0f10f8822e2a5e7d2fabb71dbea3f");
} }
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() { public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("32af503f1d58b1f34d896ea2ddb891aa")); Arrays.asList("d357f6fe3f1350f886b0df5a207e9f6f"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
} }
@ -236,7 +236,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1, + " -L " + hg19Intervals + " -isr INTERSECTION", 1,
Arrays.asList("e39c73bbaf22b4751755d9f5bb2a8d3d")); Arrays.asList("e64e06ae90a8eab52cabe3687992da73"));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
} }
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGSGraphBased() { public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("39519c4f07d229f266430a0b0cfdf1db")); Arrays.asList("4e43cf47d32210ee6a6880af3bbfcc52"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
} }
@ -253,7 +253,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1, + " -L " + hg19Intervals + " -isr INTERSECTION", 1,
Arrays.asList("c14d7f23dedea7e7ec99a90843320c1a")); Arrays.asList("83bf354f0238a362d4b6ef8d14679999"));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
} }
@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestAggressivePcrIndelModelWGS() { public void HCTestAggressivePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
Arrays.asList("d1e1de9f1d33c8c3f96520d674c76cda")); Arrays.asList("9297d4f5a9eaab253bea1ec19a8581c2"));
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec); executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
} }
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestConservativePcrIndelModelWGS() { public void HCTestConservativePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
Arrays.asList("10363ebae7edb0e81c2e668f7dbfba15")); Arrays.asList("de2b002e0311c8fac7d053ecf8cb41f8"));
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
} }
@ -304,7 +304,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16", final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list", b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list",
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4eb63e603e7a99388bb421591350de8c")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5c1323d441ab083dbb569831959a9250"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec); executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
} }
@ -313,7 +313,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testMissingKeyAlternativeHaplotypesBugFix() { public void testMissingKeyAlternativeHaplotypesBugFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list"); b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("6c70b7a52803f0bcfc53a14b894834df")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("d8c6197a7060c16c849e15460a4287f2"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec); executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
} }
@ -323,7 +323,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list", hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list",
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("0626e4c7543acc0cbb06c34435a2b730")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e36179dcdfb6f2a4269d08543479e97c"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec); executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec);
} }
@ -346,9 +346,9 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
// but please make sure that both outputs get the same variant, // but please make sure that both outputs get the same variant,
// alleles all with DBSNP ids // alleles all with DBSNP ids
// We test here that change in active region size does not have an effect in placement of indels. // We test here that change in active region size does not have an effect in placement of indels.
final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList("f5de88bb1a1911eb5e6540a59f1517e2")); final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList("8512638ddb84eec5f58eaa1be6f58401"));
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec); executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec);
final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList("fb957604f506fe9a62138943bd82543e")); final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList("9645d154812ae4db5357e7e5628936ab"));
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec); executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
} }

View File

@ -132,7 +132,7 @@ public class PairHMMLikelihoodCalculationEngineUnitTest extends BaseTest {
@Test(dataProvider = "PcrErrorModelTestProvider", enabled = true) @Test(dataProvider = "PcrErrorModelTestProvider", enabled = true)
public void createPcrErrorModelTest(final String repeat, final int repeatLength) { public void createPcrErrorModelTest(final String repeat, final int repeatLength) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte)0, false, final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte)0,
PairHMM.HMM_IMPLEMENTATION.ORIGINAL, 0.0, true, PairHMM.HMM_IMPLEMENTATION.ORIGINAL, 0.0, true,
PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.CONSERVATIVE); PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.CONSERVATIVE);

View File

@ -91,7 +91,7 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
//final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10, false, //final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10, false,
// PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3); // PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3);
final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10, false, final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, Double.NEGATIVE_INFINITY,true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE); PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, Double.NEGATIVE_INFINITY,true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
// When using likelihoods it should be around 0.05 since // When using likelihoods it should be around 0.05 since

View File

@ -166,17 +166,16 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
} }
@Test(dataProvider = "ReadAlignedToRefData", enabled = true) @Test(dataProvider = "ReadAlignedToRefData", enabled = true)
public void testReadAlignedToRef(final GATKSAMRecord read, final Haplotype haplotype, final int refStart, final int expectedReadStart, final String expectedReadCigar) throws Exception { public void testReadAlignedToRef(final GATKSAMRecord read, final Haplotype haplotype, final int refStart, final int expectedReadStart, final String expectedReadCigar) throws Exception {
final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination()); final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination());
final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone(); final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone();
if ( expectedReadCigar == null ) { if ( expectedReadCigar == null ) {
Assert.assertNull(writer.createReadAlignedToRef(read, haplotype, refStart, true)); Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true));
} else { } else {
final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar); final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar);
final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, refStart, true); final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true);
Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName()); Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName());
Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart); Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart);
@ -286,7 +285,7 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG) @Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG)
public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception { public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception {
final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination()); final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination());
final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, 1, true); final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, 1, true);
if ( alignedRead != null ) { if ( alignedRead != null ) {
final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches; final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches;
Assert.assertTrue(mismatches <= expectedMaxMismatches, Assert.assertTrue(mismatches <= expectedMaxMismatches,

View File

@ -430,12 +430,12 @@ public final class GenomeLocParser {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
/** /**
* create a genome loc, given a read. If the read is unmapped, *and* yet the read has a contig and start position, * Create a genome loc, given a read. If the read is unmapped, *and* yet the read has a contig and start position,
* then a GenomeLoc is returned for contig:start-start, otherwise and UNMAPPED GenomeLoc is returned. * then a GenomeLoc is returned for contig:start-start, otherwise an UNMAPPED GenomeLoc is returned.
* *
* @param read * @param read the read from which to create a genome loc
* *
* @return * @return the GenomeLoc that was created
*/ */
@Requires("read != null") @Requires("read != null")
@Ensures("result != null") @Ensures("result != null")
@ -445,11 +445,32 @@ public final class GenomeLocParser {
return GenomeLoc.UNMAPPED; return GenomeLoc.UNMAPPED;
else { else {
// Use Math.max to ensure that end >= start (Picard assigns the end to reads that are entirely within an insertion as start-1) // Use Math.max to ensure that end >= start (Picard assigns the end to reads that are entirely within an insertion as start-1)
int end = read.getReadUnmappedFlag() ? read.getAlignmentStart() : Math.max(read.getAlignmentEnd(), read.getAlignmentStart()); final int end = read.getReadUnmappedFlag() ? read.getAlignmentStart() : Math.max(read.getAlignmentEnd(), read.getAlignmentStart());
return createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), end, false); return createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), end, false);
} }
} }
/**
* Create a genome loc, given a read using its unclipped alignment. If the read is unmapped, *and* yet the read has a contig and start position,
* then a GenomeLoc is returned for contig:start-start, otherwise an UNMAPPED GenomeLoc is returned.
*
* @param read the read from which to create a genome loc
*
* @return the GenomeLoc that was created
*/
@Requires("read != null")
@Ensures("result != null")
public GenomeLoc createGenomeLocUnclipped(final SAMRecord read) {
if ( read.getReadUnmappedFlag() && read.getReferenceIndex() == -1 )
// read is unmapped and not placed anywhere on the genome
return GenomeLoc.UNMAPPED;
else {
// Use Math.max to ensure that end >= start (Picard assigns the end to reads that are entirely within an insertion as start-1)
final int end = read.getReadUnmappedFlag() ? read.getUnclippedEnd() : Math.max(read.getUnclippedEnd(), read.getUnclippedStart());
return createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getUnclippedStart(), end, false);
}
}
/** /**
* Creates a GenomeLoc from a Tribble feature * Creates a GenomeLoc from a Tribble feature
* @param feature * @param feature

View File

@ -67,12 +67,7 @@ public class ClippingOp {
* @param originalRead the read to be clipped * @param originalRead the read to be clipped
*/ */
public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord originalRead) { public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord originalRead) {
GATKSAMRecord read; GATKSAMRecord read = (GATKSAMRecord) originalRead.clone();
try {
read = (GATKSAMRecord) originalRead.clone();
} catch (CloneNotSupportedException e) {
throw new ReviewedGATKException("Where did the clone go?");
}
byte[] quals = read.getBaseQualities(); byte[] quals = read.getBaseQualities();
byte[] bases = read.getReadBases(); byte[] bases = read.getReadBases();
byte[] newBases = new byte[bases.length]; byte[] newBases = new byte[bases.length];
@ -169,14 +164,7 @@ public class ClippingOp {
} }
private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) { private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
GATKSAMRecord unclipped; GATKSAMRecord unclipped = (GATKSAMRecord) read.clone();
// shallow copy of the read bases and quals should be fine here because they are immutable in the original read
try {
unclipped = (GATKSAMRecord) read.clone();
} catch (CloneNotSupportedException e) {
throw new ReviewedGATKException("Where did the clone go?");
}
Cigar unclippedCigar = new Cigar(); Cigar unclippedCigar = new Cigar();
int matchesCount = 0; int matchesCount = 0;
@ -376,12 +364,7 @@ public class ClippingOp {
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
final GATKSAMRecord hardClippedRead; final GATKSAMRecord hardClippedRead = (GATKSAMRecord) read.clone();
try {
hardClippedRead = (GATKSAMRecord) read.clone();
} catch (CloneNotSupportedException e) {
throw new ReviewedGATKException("Where did the clone go?");
}
hardClippedRead.resetSoftStartAndEnd(); // reset the cached soft start and end because they may have changed now that the read was hard clipped. No need to calculate them now. They'll be lazily calculated on the next call to getSoftStart()/End() hardClippedRead.resetSoftStartAndEnd(); // reset the cached soft start and end because they may have changed now that the read was hard clipped. No need to calculate them now. They'll be lazily calculated on the next call to getSoftStart()/End()
hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setBaseQualities(newQuals);

View File

@ -41,11 +41,7 @@ import java.util.List;
public class DupUtils { public class DupUtils {
private static GATKSAMRecord tmpCopyRead(GATKSAMRecord read) { private static GATKSAMRecord tmpCopyRead(GATKSAMRecord read) {
try { return (GATKSAMRecord)read.clone();
return (GATKSAMRecord)read.clone();
} catch ( CloneNotSupportedException e ) {
throw new ReviewedGATKException("Unexpected Clone failure!");
}
} }
public static GATKSAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<GATKSAMRecord> duplicates, int maxQScore) { public static GATKSAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<GATKSAMRecord> duplicates, int maxQScore) {

View File

@ -28,9 +28,12 @@ package org.broadinstitute.gatk.utils.genotyper;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import org.broadinstitute.gatk.engine.downsampling.AlleleBiasedDownsamplingUtils; import org.broadinstitute.gatk.engine.downsampling.AlleleBiasedDownsamplingUtils;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Allele;
@ -227,7 +230,6 @@ public class PerReadAlleleLikelihoodMap {
double haplotypeLikelihood = 0.0; double haplotypeLikelihood = 0.0;
for( final Map.Entry<GATKSAMRecord, Map<Allele,Double>> entry : likelihoodReadMap.entrySet() ) { for( final Map.Entry<GATKSAMRecord, Map<Allele,Double>> entry : likelihoodReadMap.entrySet() ) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
final GATKSAMRecord read = entry.getKey();
final double likelihood_iii = entry.getValue().get(iii_allele); final double likelihood_iii = entry.getValue().get(iii_allele);
final double likelihood_jjj = entry.getValue().get(jjj_allele); final double likelihood_jjj = entry.getValue().get(jjj_allele);
haplotypeLikelihood += MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + MathUtils.LOG_ONE_HALF; haplotypeLikelihood += MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + MathUtils.LOG_ONE_HALF;
@ -381,4 +383,27 @@ public class PerReadAlleleLikelihoodMap {
public Set<Allele> getAllelesSet() { public Set<Allele> getAllelesSet() {
return Collections.unmodifiableSet(allelesSet); return Collections.unmodifiableSet(allelesSet);
} }
/**
* Loop over all of the reads in this likelihood map and realign them to its most likely haplotype
* @param haplotypes the collection of haplotypes
* @param paddedReferenceLoc the active region
*/
public void realignReadsToMostLikelyHaplotype(final Collection<Haplotype> haplotypes, final GenomeLoc paddedReferenceLoc) {
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : haplotypes )
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
final Map<GATKSAMRecord, Map<Allele, Double>> newLikelihoodReadMap = new LinkedHashMap<>(likelihoodReadMap.size());
for( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
newLikelihoodReadMap.put(alignedToRef, entry.getValue());
}
likelihoodReadMap.clear();
likelihoodReadMap.putAll(newLikelihoodReadMap);
}
} }

View File

@ -31,19 +31,21 @@ import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator; import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecord;
import org.apache.log4j.Logger; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.BaseUtils; import org.broadinstitute.gatk.utils.BaseUtils;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.recalibration.EventType; import org.broadinstitute.gatk.utils.recalibration.EventType;
import org.broadinstitute.gatk.utils.smithwaterman.SWPairwiseAlignment;
import java.util.*; import java.util.*;
public final class AlignmentUtils { public final class AlignmentUtils {
private final static Logger logger = Logger.getLogger(AlignmentUtils.class);
private final static EnumSet<CigarOperator> ALIGNED_TO_GENOME_OPERATORS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X); private final static EnumSet<CigarOperator> ALIGNED_TO_GENOME_OPERATORS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X);
private final static EnumSet<CigarOperator> ALIGNED_TO_GENOME_PLUS_SOFTCLIPS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.S); private final static EnumSet<CigarOperator> ALIGNED_TO_GENOME_PLUS_SOFTCLIPS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.S);
public final static String HAPLOTYPE_TAG = "HC";
// cannot be instantiated // cannot be instantiated
private AlignmentUtils() { } private AlignmentUtils() { }
@ -65,6 +67,65 @@ public final class AlignmentUtils {
return first == CigarOperator.D || first == CigarOperator.I || last == CigarOperator.D || last == CigarOperator.I; return first == CigarOperator.D || first == CigarOperator.I || last == CigarOperator.D || last == CigarOperator.I;
} }
/**
* Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
* via the alignment of haplotype (via its getCigar) method.
*
* @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
* @return a GATKSAMRecord aligned to reference, or null if no meaningful alignment is possible
*/
public static GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead,
final Haplotype haplotype,
final int referenceStart,
final boolean isInformative) {
if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null");
if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null");
if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype);
if ( referenceStart < 1 ) throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
// compute the smith-waterman alignment of read -> haplotype
final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), originalRead.getReadBases(), CigarUtils.NEW_SW_PARAMETERS);
if ( swPairwiseAlignment.getAlignmentStart2wrt1() == -1 )
// sw can fail (reasons not clear) so if it happens just don't realign the read
return originalRead;
final Cigar swCigar = consolidateCigar(swPairwiseAlignment.getCigar());
// since we're modifying the read we need to clone it
final GATKSAMRecord read = (GATKSAMRecord)originalRead.clone();
// only informative reads are given the haplotype tag to enhance visualization
if ( isInformative )
read.setAttribute(HAPLOTYPE_TAG, haplotype.hashCode());
// compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
final int readStartOnHaplotype = calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());
final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;
read.setAlignmentStart(readStartOnReference);
read.resetSoftStartAndEnd();
// compute the read -> ref alignment by mapping read -> hap -> ref from the
// SW of read -> hap mapped through the given by hap -> ref
final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef);
final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw);
final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, haplotype.getBases(),
originalRead.getReadBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
read.setCigar(readToRefCigar);
if ( readToRefCigar.getReadLength() != read.getReadLength() )
throw new IllegalStateException("Cigar " + readToRefCigar + " with read length " + readToRefCigar.getReadLength()
+ " != read length " + read.getReadLength() + " for read " + read.format() + "\nhapToRef " + haplotypeToRef + " length " + haplotypeToRef.getReadLength() + "/" + haplotypeToRef.getReferenceLength()
+ "\nreadToHap " + swCigar + " length " + swCigar.getReadLength() + "/" + swCigar.getReferenceLength());
return read;
}
/** /**
* Get the byte[] from bases that cover the reference interval refStart -> refEnd given the * Get the byte[] from bases that cover the reference interval refStart -> refEnd given the

View File

@ -49,7 +49,7 @@ import java.util.*;
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter * Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values. * functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
*/ */
public class GATKSAMRecord extends BAMRecord { public class GATKSAMRecord extends BAMRecord implements Cloneable {
// Base Quality Score Recalibrator specific attribute tags // Base Quality Score Recalibrator specific attribute tags
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions
public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions
@ -593,17 +593,20 @@ public class GATKSAMRecord extends BAMRecord {
* This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway. * This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway.
* *
* @return a shallow copy of the GATKSAMRecord * @return a shallow copy of the GATKSAMRecord
* @throws CloneNotSupportedException
*/ */
@Override @Override
public Object clone() throws CloneNotSupportedException { public Object clone() {
final GATKSAMRecord clone = (GATKSAMRecord) super.clone(); try {
if (temporaryAttributes != null) { final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
clone.temporaryAttributes = new HashMap<>(); if (temporaryAttributes != null) {
for (Object attribute : temporaryAttributes.keySet()) clone.temporaryAttributes = new HashMap<>();
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute)); for (Object attribute : temporaryAttributes.keySet())
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
}
return clone;
} catch (final CloneNotSupportedException e) {
throw new RuntimeException( e );
} }
return clone;
} }
/** /**

View File

@ -28,6 +28,7 @@ package org.broadinstitute.gatk.utils.sam;
import htsjdk.samtools.*; import htsjdk.samtools.*;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
@ -172,7 +173,6 @@ public class AlignmentUtilsUnitTest {
Assert.assertEquals(AlignmentUtils.calcNumDifferentBases(cigar, ref.getBytes(), read.getBytes()), expectedDifferences); Assert.assertEquals(AlignmentUtils.calcNumDifferentBases(cigar, ref.getBytes(), read.getBytes()), expectedDifferences);
} }
@DataProvider(name = "NumAlignedBasesCountingSoftClips") @DataProvider(name = "NumAlignedBasesCountingSoftClips")
public Object[][] makeNumAlignedBasesCountingSoftClips() { public Object[][] makeNumAlignedBasesCountingSoftClips() {
List<Object[]> tests = new ArrayList<Object[]>(); List<Object[]> tests = new ArrayList<Object[]>();