HC bam writer now sets the read to MQ0 if it isn't informative

-- Makes visualization of read evidence easier in IGV.
2013-06-13 09:59:16 -04:00 · 2013-06-13 09:59:16 -04:00 · d5f0848bd5
parent 17d3ccb03b
commit d5f0848bd5
4 changed files with 21 additions and 13 deletions
--- a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/AllHaplotypeBAMWriter.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/AllHaplotypeBAMWriter.java
@ -80,18 +80,18 @@ class AllHaplotypeBAMWriter extends HaplotypeBAMWriter {
                                              final List<Haplotype> bestHaplotypes,
                                              final Set<Haplotype> calledHaplotypes,
                                              final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
-        writeHaplotypesAsReads(haplotypes, new HashSet<Haplotype>(bestHaplotypes), paddedReferenceLoc);
+        writeHaplotypesAsReads(haplotypes, new HashSet<>(bestHaplotypes), paddedReferenceLoc);

        // we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
-        final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<Allele, Haplotype>(haplotypes.size());
+        final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
        for ( final Haplotype haplotype : haplotypes )
            alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);

        // next, output the interesting reads for each sample aligned against the appropriate haplotype
        for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
-            for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
+            for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
                final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
-                writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart());
+                writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
            }
        }
    }
--- a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/CalledHaplotypeBAMWriter.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/CalledHaplotypeBAMWriter.java
@ -87,7 +87,7 @@ class CalledHaplotypeBAMWriter extends HaplotypeBAMWriter {
        writeHaplotypesAsReads(calledHaplotypes, calledHaplotypes, paddedReferenceLoc);

        // we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
-        final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<Allele, Haplotype>(haplotypes.size());
+        final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size());
        for ( final Haplotype haplotype : calledHaplotypes ) {
            alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype);
        }
@ -97,10 +97,10 @@ class CalledHaplotypeBAMWriter extends HaplotypeBAMWriter {

        // next, output the interesting reads for each sample aligned against one of the called haplotypes
        for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
-            for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
+            for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
                if ( entry.getKey().getMappingQuality() > 0 ) {
                    final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue(), allelesOfCalledHaplotypes);
-                    writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart());
+                    writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), paddedReferenceLoc.getStart(), bestAllele.isInformative());
                }
            }
        }
--- a/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriter.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriter.java
@ -185,11 +185,13 @@ public abstract class HaplotypeBAMWriter {
     * @param originalRead the read we want to write aligned to the reference genome
     * @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
     * @param referenceStart the start of the reference that haplotype is aligned to.  Provides global coordinate frame.
+     * @param isInformative true if the read is differentially informative for one of the haplotypes
     */
    protected void writeReadAgainstHaplotype(final GATKSAMRecord originalRead,
                                             final Haplotype haplotype,
-                                             final int referenceStart) {
-        final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart);
+                                             final int referenceStart,
+                                             final boolean isInformative) {
+        final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart, isInformative);
        if ( alignedToRef != null )
            bamWriter.addAlignment(alignedToRef);
    }
@ -201,11 +203,13 @@ public abstract class HaplotypeBAMWriter {
     * @param originalRead the read we want to write aligned to the reference genome
     * @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
     * @param referenceStart the start of the reference that haplotype is aligned to.  Provides global coordinate frame.
+     * @param isInformative true if the read is differentially informative for one of the haplotypes
     * @return a GATKSAMRecord aligned to reference, or null if no meaningful alignment is possible
     */
    protected GATKSAMRecord createReadAlignedToRef(final GATKSAMRecord originalRead,
                                                   final Haplotype haplotype,
-                                                   final int referenceStart) {
+                                                   final int referenceStart,
+                                                   final boolean isInformative) {
        if ( originalRead == null ) throw new IllegalArgumentException("originalRead cannot be null");
        if ( haplotype == null ) throw new IllegalArgumentException("haplotype cannot be null");
        if ( haplotype.getCigar() == null ) throw new IllegalArgumentException("Haplotype cigar not set " + haplotype);
@ -225,6 +229,10 @@ public abstract class HaplotypeBAMWriter {

            addHaplotypeTag(read, haplotype);

+            // uninformative reads are set to zero mapping quality to enhance visualization
+            if ( !isInformative )
+                read.setMappingQuality(0);
+
            // compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
            final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
            final int readStartOnHaplotype = AlignmentUtils.calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());
--- a/protected/java/test/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/utils/haplotypeBAMWriter/HaplotypeBAMWriterUnitTest.java
@ -177,10 +177,10 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
        final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone();

        if ( expectedReadCigar == null ) {
-            Assert.assertNull(writer.createReadAlignedToRef(read, haplotype, refStart));
+            Assert.assertNull(writer.createReadAlignedToRef(read, haplotype, refStart, true));
        } else {
            final Cigar expectedCigar = TextCigarCodec.getSingleton().decode(expectedReadCigar);
-            final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, refStart);
+            final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, refStart, true);

            Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName());
            Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart);
@ -290,7 +290,7 @@ public class HaplotypeBAMWriterUnitTest extends BaseTest {
    @Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG)
    public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception {
        final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockBAMWriter());
-        final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, 1);
+        final GATKSAMRecord alignedRead = writer.createReadAlignedToRef(read, haplotype, 1, true);
        if ( alignedRead != null ) {
            final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches;
            Assert.assertTrue(mismatches <= expectedMaxMismatches,