Reads are now realigned to the most likely haplotype before being used by the annotations.

-- AD,DP will now correspond directly to the reads that were used to construct the PLs
-- RankSumTests, etc. will use the bases from the realigned reads instead of the original alignments
-- There is now no additional runtime cost to realign the reads when using bamout or GVCF mode
-- bamout mode no longer sets the mapping quality to zero for uninformative reads, instead the read will not be given an HC tag
This commit is contained in:
Ryan Poplin 2014-06-11 15:45:45 -04:00
parent dc93507023
commit 0127799cba
21 changed files with 197 additions and 257 deletions

View File

@ -692,7 +692,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
private ReadLikelihoodCalculationEngine createLikelihoodCalculationEngine() {
switch (likelihoodEngineImplementation) {
case PairHMM:
return new PairHMMLikelihoodCalculationEngine( (byte)gcpHMM, SCAC.DEBUG, pairHMM, log10GlobalReadMismappingRate, noFpga, pcrErrorModel );
return new PairHMMLikelihoodCalculationEngine( (byte)gcpHMM, pairHMM, log10GlobalReadMismappingRate, noFpga, pcrErrorModel );
case GraphBased:
return new GraphBasedLikelihoodCalculationEngine( (byte)gcpHMM,log10GlobalReadMismappingRate,heterogeneousKmerSizeResultion,SCAC.DEBUG,debugGraphTransformations);
case Random:
@ -839,8 +839,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final Map<String,List<GATKSAMRecord>> reads = splitReadsBySample( regionForGenotyping.getReads() );
// Calculate the likelihoods: CPU intesive part.
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap =
likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,reads);
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,reads);
// Realign all the reads to the most likely haplotype for use by the annotations
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> entry : stratifiedReadMap.entrySet() ) {
entry.getValue().realignReadsToMostLikelyHaplotype(haplotypes, assemblyResult.getPaddedReferenceLoc());
}
// Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
// was a bad interaction between that selection and the marginalization that happens over each event when computing

View File

@ -254,12 +254,12 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap);
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
final VariantContext call = calculateGenotypes(null,null,null,null,new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel, false,null);
final VariantContext call = calculateGenotypes(null, null, null, null, new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel, false, null);
if( call != null ) {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( configuration.USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) );
if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap_annotations);
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = addFilteredReadList(genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call, true);
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
@ -285,7 +285,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* Add the <NON_REF> allele
* @param stratifiedReadMap target per-read-allele-likelihood-map.
*/
public static Map<String, PerReadAlleleLikelihoodMap> addMiscellaneousAllele(final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
public static Map<String, PerReadAlleleLikelihoodMap> addMiscellaneousAllele(final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
final Allele miscellanoeusAllele = GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE;
for (Map.Entry<String, PerReadAlleleLikelihoodMap> perSample : stratifiedReadMap.entrySet()) {
for (Map.Entry<GATKSAMRecord, Map<Allele, Double>> perRead : perSample.getValue().getLikelihoodReadMap().entrySet()) {
@ -430,19 +430,20 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return genotypes;
}
private static Map<String, PerReadAlleleLikelihoodMap> filterToOnlyOverlappingReads( final GenomeLocParser parser,
final Map<String, PerReadAlleleLikelihoodMap> perSampleReadMap,
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
final VariantContext call ) {
private static Map<String, PerReadAlleleLikelihoodMap> addFilteredReadList(final GenomeLocParser parser,
final Map<String, PerReadAlleleLikelihoodMap> perSampleReadMap,
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
final VariantContext call,
final boolean requireOverlap) {
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new LinkedHashMap<>();
final GenomeLoc callLoc = parser.createGenomeLoc(call);
final GenomeLoc callLoc = ( requireOverlap ? parser.createGenomeLoc(call) : null );
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perSampleReadMap.entrySet() ) {
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
for( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> mapEntry : sample.getValue().getLikelihoodReadMap().entrySet() ) {
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
if( callLoc.overlapsP(parser.createGenomeLoc(mapEntry.getKey())) ) { // BUGBUG: This uses alignment start and stop, NOT soft start and soft end...
if( !requireOverlap || callLoc.overlapsP(parser.createGenomeLocUnclipped(mapEntry.getKey())) ) {
for( final Map.Entry<Allele,Double> alleleDoubleEntry : mapEntry.getValue().entrySet() ) {
likelihoodMap.add(mapEntry.getKey(), alleleDoubleEntry.getKey(), alleleDoubleEntry.getValue());
}
@ -452,7 +453,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
if( !requireOverlap || callLoc.overlapsP(parser.createGenomeLocUnclipped(read)) ) {
for( final Allele allele : call.getAlleles() ) {
likelihoodMap.add(read, allele, 0.0);
}

View File

@ -74,7 +74,6 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
private final byte constantGCP;
private final double log10globalReadMismappingRate;
private final boolean DEBUG;
private final PairHMM.HMM_IMPLEMENTATION hmmType;
private final boolean noFpga;
@ -141,7 +140,6 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
* Create a new PairHMMLikelihoodCalculationEngine using provided parameters and hmm to do its calculations
*
* @param constantGCP the gap continuation penalty to use with the PairHMM
* @param debug should we emit debugging information during the calculation?
* @param hmmType the type of the HMM to use
* @param log10globalReadMismappingRate the global mismapping probability, in log10(prob) units. A value of
* -3 means that the chance that a read doesn't actually belong at this
@ -153,10 +151,9 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
* assigned a likelihood of -13.
* @param noFpga disable FPGA acceleration
*/
public PairHMMLikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType, final double log10globalReadMismappingRate, final boolean noFpga, final PCR_ERROR_MODEL pcrErrorModel ) {
public PairHMMLikelihoodCalculationEngine( final byte constantGCP, final PairHMM.HMM_IMPLEMENTATION hmmType, final double log10globalReadMismappingRate, final boolean noFpga, final PCR_ERROR_MODEL pcrErrorModel ) {
this.hmmType = hmmType;
this.constantGCP = constantGCP;
this.DEBUG = debug;
this.log10globalReadMismappingRate = log10globalReadMismappingRate;
this.noFpga = noFpga;
this.pcrErrorModel = pcrErrorModel;
@ -374,21 +371,6 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
}
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
// Add likelihoods for each sample's reads to our stratifiedReadMap
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = new LinkedHashMap<>();
for( final Map.Entry<String, List<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
// evaluate the likelihood of the reads given those haplotypes
final PerReadAlleleLikelihoodMap map = computeReadLikelihoods(haplotypes, sampleEntry.getValue());
map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE);
stratifiedReadMap.put(sampleEntry.getKey(), map);
}
return stratifiedReadMap;
}
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List<Haplotype> haplotypes, final List<GATKSAMRecord> reads) {
// Modify the read qualities by applying the PCR error model and capping the minimum base,insertion,deletion qualities

View File

@ -55,15 +55,12 @@ import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.ReadDestination;
import org.broadinstitute.gatk.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.ReadUtils;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFHeaderLine;
@ -89,7 +86,6 @@ public class ReferenceConfidenceModel {
private final GenomeLocParser genomeLocParser;
private final Set<String> samples;
private final SAMFileHeader header; // TODO -- really shouldn't depend on this
private final int indelInformativeDepthIndelSize;
private final static boolean WRITE_DEBUGGING_BAM = false;
@ -117,7 +113,6 @@ public class ReferenceConfidenceModel {
this.genomeLocParser = genomeLocParser;
this.samples = samples;
this.header = header;
this.indelInformativeDepthIndelSize = indelInformativeDepthIndelSize;
if ( WRITE_DEBUGGING_BAM ) {
@ -326,24 +321,13 @@ public class ReferenceConfidenceModel {
if ( stratifiedReadMap == null ) throw new IllegalArgumentException("stratifiedReadMap cannot be null");
if ( stratifiedReadMap.size() != 1 ) throw new IllegalArgumentException("stratifiedReadMap must contain exactly one sample but it contained " + stratifiedReadMap.size());
List<GATKSAMRecord> realignedReads;
if( calledHaplotypes.size() == 1 ) { // only contains ref haplotype so an optimization is to just trust the alignments to the reference haplotype as provided by the aligner
realignedReads = activeRegion.getReads();
} else {
final ReadDestination.ToList realignedReadsDest = new ReadDestination.ToList(header, "FOO");
final HaplotypeBAMWriter writer = HaplotypeBAMWriter.create(HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES, realignedReadsDest);
writer.setWriteHaplotypesAsWell(false); // don't write out reads for the haplotypes, as we only want the realigned reads themselves
writer.setOnlyRealignInformativeReads(true);
writer.writeReadsAlignedToHaplotypes(calledHaplotypes, paddedReferenceLoc, stratifiedReadMap);
realignedReads = ReadUtils.sortReadsByCoordinate(realignedReadsDest.getReads());
}
final List<GATKSAMRecord> reads = activeRegion.getReads();
if ( debuggingWriter != null )
for ( final GATKSAMRecord read : realignedReads )
for ( final GATKSAMRecord read : reads )
debuggingWriter.addAlignment(read);
final LocusIteratorByState libs = new LocusIteratorByState(realignedReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING,
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING,
true, genomeLocParser, samples, false);
final List<ReadBackedPileup> pileups = new LinkedList<>();

View File

@ -81,16 +81,9 @@ class AllHaplotypeBAMWriter extends HaplotypeBAMWriter {
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
writeHaplotypesAsReads(haplotypes, new HashSet<>(bestHaplotypes), paddedReferenceLoc);
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : haplotypes )
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
// next, output the interesting reads for each sample aligned against the appropriate haplotype
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
for( final GATKSAMRecord read : readAlleleLikelihoodMap.getLikelihoodReadMap().keySet() ) {
writeReadAgainstHaplotype(read);
}
}
}

View File

@ -88,22 +88,9 @@ class CalledHaplotypeBAMWriter extends HaplotypeBAMWriter {
writeHaplotypesAsReads(calledHaplotypes, calledHaplotypes, paddedReferenceLoc);
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : calledHaplotypes ) {
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
}
// the set of all alleles that were actually called
final Set<Allele> allelesOfCalledHaplotypes = alleleToHaplotypeMap.keySet();
// next, output the interesting reads for each sample aligned against one of the called haplotypes
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue(), allelesOfCalledHaplotypes);
final Haplotype haplotype = alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele());
if (haplotype == null) continue;
writeReadAgainstHaplotype(entry.getKey(), haplotype, paddedReferenceLoc.getStart(), bestAllele.isInformative());
for ( final GATKSAMRecord read : readAlleleLikelihoodMap.getLikelihoodReadMap().keySet() ) {
writeReadAgainstHaplotype(read);
}
}
}

View File

@ -78,7 +78,6 @@ public abstract class HaplotypeBAMWriter {
private long uniqueNameCounter = 1;
protected final static String READ_GROUP_ID = "ArtificialHaplotype";
protected final static String HAPLOTYPE_TAG = "HC";
private final ReadDestination output;
private boolean writeHaplotypesAsWell = true;
@ -173,108 +172,10 @@ public abstract class HaplotypeBAMWriter {
/**
* Write out read aligned to haplotype to the BAM file
*
* Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
* via the alignment of haplotype (via its getCigar) method.
*
* @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
*/
protected void writeReadAgainstHaplotype(final GATKSAMRecord originalRead,
final Haplotype haplotype,
final int referenceStart,
final boolean isInformative) {
if( onlyRealignInformativeReads && !isInformative ) {
if( originalRead != null ) {
output.add(originalRead);
}
} else if (haplotype == null) {
output.add(originalRead);
return;
} else {
final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart, isInformative);
if ( alignedToRef != null ) {
output.add(alignedToRef);
} else {
output.add(originalRead);
}
}
}
/**
* Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
* via the alignment of haplotype (via its getCigar) method.
*
* @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
* @return a GATKSAMRecord aligned to reference, or null if no meaningful alignment is possible
*/
protected GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead,
final Haplotype haplotype,
final int referenceStart,
final boolean isInformative) {
if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null");
if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null");
if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype);
if ( referenceStart < 1 ) throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
try {
// compute the smith-waterman alignment of read -> haplotype
final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), originalRead.getReadBases(), CigarUtils.NEW_SW_PARAMETERS);
//swPairwiseAlignment.printAlignment(haplotype.getBases(), originalRead.getReadBases());
if ( swPairwiseAlignment.getAlignmentStart2wrt1() == -1 )
// sw can fail (reasons not clear) so if it happens just don't write the read
return null;
final Cigar swCigar = AlignmentUtils.consolidateCigar(swPairwiseAlignment.getCigar());
// since we're modifying the read we need to clone it
final GATKSAMRecord read = (GATKSAMRecord)originalRead.clone();
addHaplotypeTag(read, haplotype);
// uninformative reads are set to zero mapping quality to enhance visualization
if ( !isInformative )
read.setMappingQuality(0);
// compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
final int readStartOnHaplotype = AlignmentUtils.calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());
final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;
read.setAlignmentStart(readStartOnReference);
// compute the read -> ref alignment by mapping read -> hap -> ref from the
// SW of read -> hap mapped through the given by hap -> ref
final Cigar haplotypeToRef = AlignmentUtils.trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
final Cigar readToRefCigarRaw = AlignmentUtils.applyCigarToCigar(swCigar, haplotypeToRef);
final Cigar readToRefCigarClean = AlignmentUtils.cleanUpCigar(readToRefCigarRaw);
final Cigar readToRefCigar = AlignmentUtils.leftAlignIndel(readToRefCigarClean, haplotype.getBases(),
originalRead.getReadBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
read.setCigar(readToRefCigar);
if ( readToRefCigar.getReadLength() != read.getReadLength() )
throw new IllegalStateException("Cigar " + readToRefCigar + " with read length " + readToRefCigar.getReadLength()
+ " != read length " + read.getReadLength() + " for read " + read.format() + "\nhapToRef " + haplotypeToRef + " length " + haplotypeToRef.getReadLength() + "/" + haplotypeToRef.getReferenceLength()
+ "\nreadToHap " + swCigar + " length " + swCigar.getReadLength() + "/" + swCigar.getReferenceLength());
return read;
} catch ( CloneNotSupportedException e ) {
throw new IllegalStateException("GATKSAMRecords should support clone but this one does not " + originalRead);
}
}
/**
* Add a haplotype tag to the read based on haplotype
*
* @param read the read to add the tag to
* @param haplotype the haplotype that gives rises to read
*/
private void addHaplotypeTag(final GATKSAMRecord read, final Haplotype haplotype) {
// add a tag to the read that indicates which haplotype it best aligned to. It's a uniquish integer
read.setAttribute(HAPLOTYPE_TAG, haplotype.hashCode());
protected void writeReadAgainstHaplotype(final GATKSAMRecord originalRead) {
output.add(originalRead);
}
/**
@ -309,7 +210,7 @@ public abstract class HaplotypeBAMWriter {
record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar()));
record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0);
record.setReadName("HC" + uniqueNameCounter++);
addHaplotypeTag(record, haplotype);
record.setAttribute(AlignmentUtils.HAPLOTYPE_TAG, haplotype.hashCode());
record.setReadUnmappedFlag(false);
record.setReferenceIndex(paddedRefLoc.getContigIndex());
record.setAttribute(SAMTag.RG.toString(), READ_GROUP_ID);

View File

@ -119,7 +119,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
@SuppressWarnings("unused")
public void timeLoglessPairHMM(final int reps) {
for (int i = 0; i < reps; i++) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10, false,
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
engine.computeReadLikelihoods(dataSet.assemblyResultSet(), Collections.singletonMap("anonymous", dataSet.readList()));
}

View File

@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleComplex1() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "d1626d5fff419b8a782de954224e881f");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "bb761c6cd0893e7e96c56bf1d8494588");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -88,13 +88,13 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"724a05b7df716647014f29c0fe86e071");
"fb1042487efb03896f482c08d7eb2671");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"437ce80af30631de077fd617b13cd2e4");
"9a6e314d7a8fe1c4a9ff5048544cb670");
}
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"45975c205e1202d19b235312588dd733");
"1e4af2f8fc97e0b07e625f142374742f");
}
}

View File

@ -67,12 +67,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "bb6c326616790076de5111a7a3f6feb5"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "06d37e116871d8840d3de17a51049363"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "53517b967a60d800a9d9d1d5a1bc54c9"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "080cf92bfcb7db4273e2d723045daee5"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "a4c0a28a451069faaac937e45c701043"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "0bc25b42dc73eeadb23a35227684172c"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "c95a777e3aac50b9f29302919a00ba4e"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "05860d40923e8b074576c787037c5768"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "f3abc3050b2708fc78c903fb70ac253e"});
return tests.toArray(new Object[][]{});
}
@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testWrongGVCFNonVariantRecordOrderBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("324eb46738a364cd7dc5fa0b62491a5e"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e286b5eb72e3f75e7b08e5729cdfe019"));
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
}

View File

@ -84,27 +84,27 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "9e90f4ed1a04ce159ad3e560e62f5d6b");
HCTest(CEUTRIO_BAM, "", "030b45c91879eb41f30abbd10ed88fa9");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "96f299a5cf411900b8eda3845c3ce465");
HCTest(NA12878_BAM, "", "6632475b1cefce9a0c5727bc919501e5");
}
@Test
public void testHaplotypeCallerMinBaseQuality() {
HCTest(NA12878_BAM, "-mbq 15", "6509cfd0554ecbb92a1b303fbcc0fcca");
HCTest(NA12878_BAM, "-mbq 15", "31fe63ea44407b6aa40261319a414488");
}
@Test
public void testHaplotypeCallerGraphBasedSingleSample() {
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "f87bb723bfb9b8f604db1d53624d24e3");
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "5a49b5b98247070e8de637a706b02db9");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "8d1edb0ea25c55a8c825b4e1470e219f");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "d530933588810bb4cf04a9783d187433");
}
@Test(enabled = false) // can't annotate the rsID's yet
@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"d734a90b07dd0d6b051b7c0961787f98");
"844d1daa4a8835f4c7e10d0d8bbfe510");
}
@Test
@ -131,7 +131,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "d3fc49d3d3c8b6439548133e03faa540");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "ec77ac891cac9461b2eb011270166dc1");
}
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -168,7 +168,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerNearbySmallIntervals() {
HCTestNearbySmallIntervals(NA12878_BAM, "", "a415bc76231a04dc38412ff38aa0dc49");
HCTestNearbySmallIntervals(NA12878_BAM, "", "59e0f10f8822e2a5e7d2fabb71dbea3f");
}
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("32af503f1d58b1f34d896ea2ddb891aa"));
Arrays.asList("d357f6fe3f1350f886b0df5a207e9f6f"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -236,7 +236,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1,
Arrays.asList("e39c73bbaf22b4751755d9f5bb2a8d3d"));
Arrays.asList("e64e06ae90a8eab52cabe3687992da73"));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
}
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestDBSNPAnnotationWGSGraphBased() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
Arrays.asList("39519c4f07d229f266430a0b0cfdf1db"));
Arrays.asList("4e43cf47d32210ee6a6880af3bbfcc52"));
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
}
@ -253,7 +253,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1,
Arrays.asList("c14d7f23dedea7e7ec99a90843320c1a"));
Arrays.asList("83bf354f0238a362d4b6ef8d14679999"));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
}
@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestAggressivePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
Arrays.asList("d1e1de9f1d33c8c3f96520d674c76cda"));
Arrays.asList("9297d4f5a9eaab253bea1ec19a8581c2"));
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
}
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestConservativePcrIndelModelWGS() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
Arrays.asList("10363ebae7edb0e81c2e668f7dbfba15"));
Arrays.asList("de2b002e0311c8fac7d053ecf8cb41f8"));
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
}
@ -304,7 +304,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list",
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4eb63e603e7a99388bb421591350de8c"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5c1323d441ab083dbb569831959a9250"));
spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
}
@ -313,7 +313,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testMissingKeyAlternativeHaplotypesBugFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("6c70b7a52803f0bcfc53a14b894834df"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("d8c6197a7060c16c849e15460a4287f2"));
spec.disableShadowBCF();
executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
}
@ -323,7 +323,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list",
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("0626e4c7543acc0cbb06c34435a2b730"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e36179dcdfb6f2a4269d08543479e97c"));
spec.disableShadowBCF();
executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec);
}
@ -346,9 +346,9 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
// but please make sure that both outputs get the same variant,
// alleles all with DBSNP ids
// We test here that change in active region size does not have an effect in placement of indels.
final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList("f5de88bb1a1911eb5e6540a59f1517e2"));
final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList("8512638ddb84eec5f58eaa1be6f58401"));
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec);
final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList("fb957604f506fe9a62138943bd82543e"));
final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList("9645d154812ae4db5357e7e5628936ab"));
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
}

View File

@ -132,7 +132,7 @@ public class PairHMMLikelihoodCalculationEngineUnitTest extends BaseTest {
@Test(dataProvider = "PcrErrorModelTestProvider", enabled = true)
public void createPcrErrorModelTest(final String repeat, final int repeatLength) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte)0, false,
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte)0,
PairHMM.HMM_IMPLEMENTATION.ORIGINAL, 0.0, true,
PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.CONSERVATIVE);

View File

@ -91,7 +91,7 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
//final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10, false,
// PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3);
final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10, false,
final PairHMMLikelihoodCalculationEngine fullPairHMM = new PairHMMLikelihoodCalculationEngine((byte)10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, Double.NEGATIVE_INFINITY,true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
// When using likelihoods it should be around 0.05 since

View File

@ -166,17 +166,16 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
}
@Test(dataProvider = "ReadAlignedToRefData", enabled = true)
public void testReadAlignedToRef(final GATKSAMRecord read, final Haplotype haplotype, final int refStart, final int expectedReadStart, final String expectedReadCigar) throws Exception {
final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination());
final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone();
if ( expectedReadCigar == null ) {
Assert.assertNull(writer.createReadAlignedToRef(read, haplotype, refStart, true));
Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true));
} else {
final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar);
final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, refStart, true);
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, refStart, true);
Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName());
Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart);
@ -286,7 +285,7 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG)
public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception {
final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination());
final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, 1, true);
final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, 1, true);
if ( alignedRead != null ) {
final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches;
Assert.assertTrue(mismatches <= expectedMaxMismatches,

View File

@ -430,12 +430,12 @@ public final class GenomeLocParser {
// --------------------------------------------------------------------------------------------------------------
/**
* create a genome loc, given a read. If the read is unmapped, *and* yet the read has a contig and start position,
* then a GenomeLoc is returned for contig:start-start, otherwise and UNMAPPED GenomeLoc is returned.
* Create a genome loc, given a read. If the read is unmapped, *and* yet the read has a contig and start position,
* then a GenomeLoc is returned for contig:start-start, otherwise an UNMAPPED GenomeLoc is returned.
*
* @param read
* @param read the read from which to create a genome loc
*
* @return
* @return the GenomeLoc that was created
*/
@Requires("read != null")
@Ensures("result != null")
@ -445,11 +445,32 @@ public final class GenomeLocParser {
return GenomeLoc.UNMAPPED;
else {
// Use Math.max to ensure that end >= start (Picard assigns the end to reads that are entirely within an insertion as start-1)
int end = read.getReadUnmappedFlag() ? read.getAlignmentStart() : Math.max(read.getAlignmentEnd(), read.getAlignmentStart());
final int end = read.getReadUnmappedFlag() ? read.getAlignmentStart() : Math.max(read.getAlignmentEnd(), read.getAlignmentStart());
return createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), end, false);
}
}
/**
* Create a genome loc, given a read using its unclipped alignment. If the read is unmapped, *and* yet the read has a contig and start position,
* then a GenomeLoc is returned for contig:start-start, otherwise an UNMAPPED GenomeLoc is returned.
*
* @param read the read from which to create a genome loc
*
* @return the GenomeLoc that was created
*/
@Requires("read != null")
@Ensures("result != null")
public GenomeLoc createGenomeLocUnclipped(final SAMRecord read) {
if ( read.getReadUnmappedFlag() && read.getReferenceIndex() == -1 )
// read is unmapped and not placed anywhere on the genome
return GenomeLoc.UNMAPPED;
else {
// Use Math.max to ensure that end >= start (Picard assigns the end to reads that are entirely within an insertion as start-1)
final int end = read.getReadUnmappedFlag() ? read.getUnclippedEnd() : Math.max(read.getUnclippedEnd(), read.getUnclippedStart());
return createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getUnclippedStart(), end, false);
}
}
/**
* Creates a GenomeLoc from a Tribble feature
* @param feature

View File

@ -67,12 +67,7 @@ public class ClippingOp {
* @param originalRead the read to be clipped
*/
public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord originalRead) {
GATKSAMRecord read;
try {
read = (GATKSAMRecord) originalRead.clone();
} catch (CloneNotSupportedException e) {
throw new ReviewedGATKException("Where did the clone go?");
}
GATKSAMRecord read = (GATKSAMRecord) originalRead.clone();
byte[] quals = read.getBaseQualities();
byte[] bases = read.getReadBases();
byte[] newBases = new byte[bases.length];
@ -169,14 +164,7 @@ public class ClippingOp {
}
private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
GATKSAMRecord unclipped;
// shallow copy of the read bases and quals should be fine here because they are immutable in the original read
try {
unclipped = (GATKSAMRecord) read.clone();
} catch (CloneNotSupportedException e) {
throw new ReviewedGATKException("Where did the clone go?");
}
GATKSAMRecord unclipped = (GATKSAMRecord) read.clone();
Cigar unclippedCigar = new Cigar();
int matchesCount = 0;
@ -376,12 +364,7 @@ public class ClippingOp {
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
final GATKSAMRecord hardClippedRead;
try {
hardClippedRead = (GATKSAMRecord) read.clone();
} catch (CloneNotSupportedException e) {
throw new ReviewedGATKException("Where did the clone go?");
}
final GATKSAMRecord hardClippedRead = (GATKSAMRecord) read.clone();
hardClippedRead.resetSoftStartAndEnd(); // reset the cached soft start and end because they may have changed now that the read was hard clipped. No need to calculate them now. They'll be lazily calculated on the next call to getSoftStart()/End()
hardClippedRead.setBaseQualities(newQuals);

View File

@ -41,11 +41,7 @@ import java.util.List;
public class DupUtils {
private static GATKSAMRecord tmpCopyRead(GATKSAMRecord read) {
try {
return (GATKSAMRecord)read.clone();
} catch ( CloneNotSupportedException e ) {
throw new ReviewedGATKException("Unexpected Clone failure!");
}
return (GATKSAMRecord)read.clone();
}
public static GATKSAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<GATKSAMRecord> duplicates, int maxQScore) {

View File

@ -28,9 +28,12 @@ package org.broadinstitute.gatk.utils.genotyper;
import com.google.java.contract.Ensures;
import org.broadinstitute.gatk.engine.downsampling.AlleleBiasedDownsamplingUtils;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
@ -227,7 +230,6 @@ public class PerReadAlleleLikelihoodMap {
double haplotypeLikelihood = 0.0;
for( final Map.Entry<GATKSAMRecord, Map<Allele,Double>> entry : likelihoodReadMap.entrySet() ) {
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
final GATKSAMRecord read = entry.getKey();
final double likelihood_iii = entry.getValue().get(iii_allele);
final double likelihood_jjj = entry.getValue().get(jjj_allele);
haplotypeLikelihood += MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + MathUtils.LOG_ONE_HALF;
@ -381,4 +383,27 @@ public class PerReadAlleleLikelihoodMap {
public Set<Allele> getAllelesSet() {
return Collections.unmodifiableSet(allelesSet);
}
/**
* Loop over all of the reads in this likelihood map and realign them to its most likely haplotype
* @param haplotypes the collection of haplotypes
* @param paddedReferenceLoc the active region
*/
public void realignReadsToMostLikelyHaplotype(final Collection<Haplotype> haplotypes, final GenomeLoc paddedReferenceLoc) {
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : haplotypes )
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
final Map<GATKSAMRecord, Map<Allele, Double>> newLikelihoodReadMap = new LinkedHashMap<>(likelihoodReadMap.size());
for( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
newLikelihoodReadMap.put(alignedToRef, entry.getValue());
}
likelihoodReadMap.clear();
likelihoodReadMap.putAll(newLikelihoodReadMap);
}
}

View File

@ -31,19 +31,21 @@ import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.BaseUtils;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.recalibration.EventType;
import org.broadinstitute.gatk.utils.smithwaterman.SWPairwiseAlignment;
import java.util.*;
public final class AlignmentUtils {
private final static Logger logger = Logger.getLogger(AlignmentUtils.class);
private final static EnumSet<CigarOperator> ALIGNED_TO_GENOME_OPERATORS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X);
private final static EnumSet<CigarOperator> ALIGNED_TO_GENOME_PLUS_SOFTCLIPS = EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.S);
public final static String HAPLOTYPE_TAG = "HC";
// cannot be instantiated
private AlignmentUtils() { }
@ -65,6 +67,65 @@ public final class AlignmentUtils {
return first == CigarOperator.D || first == CigarOperator.I || last == CigarOperator.D || last == CigarOperator.I;
}
/**
* Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
* via the alignment of haplotype (via its getCigar) method.
*
* @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
* @return a GATKSAMRecord aligned to reference, or null if no meaningful alignment is possible
*/
public static GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead,
final Haplotype haplotype,
final int referenceStart,
final boolean isInformative) {
if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null");
if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null");
if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype);
if ( referenceStart < 1 ) throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
// compute the smith-waterman alignment of read -> haplotype
final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), originalRead.getReadBases(), CigarUtils.NEW_SW_PARAMETERS);
if ( swPairwiseAlignment.getAlignmentStart2wrt1() == -1 )
// sw can fail (reasons not clear) so if it happens just don't realign the read
return originalRead;
final Cigar swCigar = consolidateCigar(swPairwiseAlignment.getCigar());
// since we're modifying the read we need to clone it
final GATKSAMRecord read = (GATKSAMRecord)originalRead.clone();
// only informative reads are given the haplotype tag to enhance visualization
if ( isInformative )
read.setAttribute(HAPLOTYPE_TAG, haplotype.hashCode());
// compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
final int readStartOnHaplotype = calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());
final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;
read.setAlignmentStart(readStartOnReference);
read.resetSoftStartAndEnd();
// compute the read -> ref alignment by mapping read -> hap -> ref from the
// SW of read -> hap mapped through the given by hap -> ref
final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef);
final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw);
final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, haplotype.getBases(),
originalRead.getReadBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
read.setCigar(readToRefCigar);
if ( readToRefCigar.getReadLength() != read.getReadLength() )
throw new IllegalStateException("Cigar " + readToRefCigar + " with read length " + readToRefCigar.getReadLength()
+ " != read length " + read.getReadLength() + " for read " + read.format() + "\nhapToRef " + haplotypeToRef + " length " + haplotypeToRef.getReadLength() + "/" + haplotypeToRef.getReferenceLength()
+ "\nreadToHap " + swCigar + " length " + swCigar.getReadLength() + "/" + swCigar.getReferenceLength());
return read;
}
/**
* Get the byte[] from bases that cover the reference interval refStart -> refEnd given the

View File

@ -49,7 +49,7 @@ import java.util.*;
* Changing these values in any way will invalidate the cached value. However, we do not monitor those setter
* functions, so modifying a GATKSAMRecord in any way may result in stale cached values.
*/
public class GATKSAMRecord extends BAMRecord {
public class GATKSAMRecord extends BAMRecord implements Cloneable {
// Base Quality Score Recalibrator specific attribute tags
public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions
public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions
@ -593,17 +593,20 @@ public class GATKSAMRecord extends BAMRecord {
* This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway.
*
* @return a shallow copy of the GATKSAMRecord
* @throws CloneNotSupportedException
*/
@Override
public Object clone() throws CloneNotSupportedException {
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
if (temporaryAttributes != null) {
clone.temporaryAttributes = new HashMap<>();
for (Object attribute : temporaryAttributes.keySet())
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
public Object clone() {
try {
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
if (temporaryAttributes != null) {
clone.temporaryAttributes = new HashMap<>();
for (Object attribute : temporaryAttributes.keySet())
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
}
return clone;
} catch (final CloneNotSupportedException e) {
throw new RuntimeException( e );
}
return clone;
}
/**

View File

@ -28,6 +28,7 @@ package org.broadinstitute.gatk.utils.sam;
import htsjdk.samtools.*;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@ -172,7 +173,6 @@ public class AlignmentUtilsUnitTest {
Assert.assertEquals(AlignmentUtils.calcNumDifferentBases(cigar, ref.getBytes(), read.getBytes()), expectedDifferences);
}
@DataProvider(name = "NumAlignedBasesCountingSoftClips")
public Object[][] makeNumAlignedBasesCountingSoftClips() {
List<Object[]> tests = new ArrayList<Object[]>();