HC bam writer now sets the read to MQ0 if it isn't informative

-- Makes visualization of read evidence easier in IGV.
This commit is contained in:
Ryan Poplin 2013-06-13 09:59:16 -04:00
parent 17d3ccb03b
commit d5f0848bd5
4 changed files with 21 additions and 13 deletions

View File

@ -80,18 +80,18 @@ class AllHaplotypeBAMWriter extends HaplotypeBAMWriter {
final List<Haplotype> bestHaplotypes,
final Set<Haplotype> calledHaplotypes,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
writeHaplotypesAsReads(haplotypes, new HashSet<Haplotype>(bestHaplotypes), paddedReferenceLoc);
writeHaplotypesAsReads(haplotypes, new HashSet<>(bestHaplotypes), paddedReferenceLoc);
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<Allele, Haplotype>(haplotypes.size());
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : haplotypes )
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
// next, output the interesting reads for each sample aligned against the appropriate haplotype
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart());
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
}
}
}

View File

@ -87,7 +87,7 @@ class CalledHaplotypeBAMWriter extends HaplotypeBAMWriter {
writeHaplotypesAsReads(calledHaplotypes, calledHaplotypes, paddedReferenceLoc);
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<Allele, Haplotype>(haplotypes.size());
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
for ( final Haplotype haplotype : calledHaplotypes ) {
alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
}
@ -97,10 +97,10 @@ class CalledHaplotypeBAMWriter extends HaplotypeBAMWriter {
// next, output the interesting reads for each sample aligned against one of the called haplotypes
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
if ( entry.getKey().getMappingQuality() > 0 ) {
final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue(), allelesOfCalledHaplotypes);
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart());
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
}
}
}

View File

@ -185,11 +185,13 @@ public abstract class HaplotypeBAMWriter {
* @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
*/
protected void writeReadAgainstHaplotype(final GATKSAMRecord originalRead,
final Haplotype haplotype,
final int referenceStart) {
final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart);
final int referenceStart,
final boolean isInformative) {
final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart, isInformative);
if ( alignedToRef != null )
bamWriter.addAlignment(alignedToRef);
}
@ -201,11 +203,13 @@ public abstract class HaplotypeBAMWriter {
* @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
* @return a GATKSAMRecord aligned to reference, or null if no meaningful alignment is possible
*/
protected GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead,
final Haplotype haplotype,
final int referenceStart) {
final int referenceStart,
final boolean isInformative) {
if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null");
if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null");
if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype);
@ -225,6 +229,10 @@ public abstract class HaplotypeBAMWriter {
addHaplotypeTag(read, haplotype);
// uninformative reads are set to zero mapping quality to enhance visualization
if ( !isInformative )
read.setMappingQuality(0);
// compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
final int readStartOnHaplotype = AlignmentUtils.calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());

View File

@ -177,10 +177,10 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone();
if ( expectedReadCigar == null ) {
Assert.assertNull(writer.createReadAlignedToRef(read, haplotype, refStart));
Assert.assertNull(writer.createReadAlignedToRef(read, haplotype, refStart, true));
} else {
final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar);
final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, refStart);
final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, refStart, true);
Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName());
Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart);
@ -290,7 +290,7 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG)
public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception {
final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockBAMWriter());
final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, 1);
final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, 1, true);
if ( alignedRead != null ) {
final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches;
Assert.assertTrue(mismatches <= expectedMaxMismatches,