diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 14c785678..39fdcb707 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -47,6 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import cern.jet.math.Arithmetic; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -74,6 +75,8 @@ import java.util.*; * calculated for certain complex indel cases or for multi-allelic sites. */ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { + private final static Logger logger = Logger.getLogger(FisherStrand.class); + private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; private static final int MIN_QUAL_FOR_FILTERED_TEST = 17; @@ -95,6 +98,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat else if (stratifiedPerReadAlleleLikelihoodMap != null) { // either SNP with no alignment context, or indels: per-read likelihood map needed final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc); +// logger.info("VC " + vc); +// printTable(table, 0.0); return pValueForBestTable(table, null); } else @@ -131,9 +136,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private Map annotationForOneTable(final double pValue) { final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(Math.max(pValue, MIN_PVALUE))); // prevent INFINITYs return Collections.singletonMap(FS, value); -// Map map = new HashMap(); -// map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pValue))); -// return map; } public List getKeyNames() { @@ -192,7 +194,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private static void printTable(int[][] table, double pValue) { - System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue); + logger.info(String.format("%d %d; %d %d : %f", table[0][0], table[0][1], table[1][0], table[1][1], pValue)); } private static boolean rotateTable(int[][] table) { @@ -315,13 +317,21 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat final boolean matchesAlt = allele.equals(alt, true); if ( matchesRef || matchesAlt ) { + final int row = matchesRef ? 0 : 1; - final boolean isFW = !read.getReadNegativeStrandFlag(); - - int row = matchesRef ? 0 : 1; - int column = isFW ? 0 : 1; - - table[row][column] += representativeCount; + if ( read.isStrandless() ) { + // a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1 + // (the 1 is to ensure that a strandless read always counts as an observation on both strands, even + // if the read is only seen once, because it's a merged read or other) + final int toAdd = Math.max(representativeCount / 2, 1); + table[row][0] += toAdd; + table[row][1] += toAdd; + } else { + // a normal read with an actual strand + final boolean isFW = !read.getReadNegativeStrandFlag(); + final int column = isFW ? 0 : 1; + table[row][column] += representativeCount; + } } } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 6c063110e..11e023b9b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -567,7 +567,7 @@ public class SlidingWindow { ObjectArrayList result = new ObjectArrayList(); if (filteredDataConsensus == null) - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); + filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, isNegativeStrand); ListIterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { @@ -583,7 +583,7 @@ public class SlidingWindow { if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) { result.add(finalizeFilteredDataConsensus()); - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); + filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, isNegativeStrand); } genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts(), headerElement.getRMS()); @@ -606,7 +606,7 @@ public class SlidingWindow { @Requires({"start >= 0 && (end >= start || end == 0)"}) private void addToRunningConsensus(LinkedList header, int start, int end, boolean isNegativeStrand) { if (runningConsensus == null) - runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); + runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, isNegativeStrand); Iterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java index 72fd52ebe..451e50286 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java @@ -124,8 +124,7 @@ public class SyntheticRead { private final ObjectArrayList basesCountsQuals; - private double mappingQuality; // the average of the rms of the mapping qualities of all the reads that contributed to this consensus - private String readTag; + private double mappingQuality; // Information to produce a GATKSAMRecord private SAMFileHeader header; @@ -147,14 +146,12 @@ public class SyntheticRead { * @param contigIndex the read's contig index * @param readName the read's name * @param refStart the alignment start (reference based) - * @param readTag the reduce reads tag for the synthetic read */ - public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, String readTag, boolean hasIndelQualities, boolean isNegativeRead) { + public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) { final int initialCapacity = 10000; basesCountsQuals = new ObjectArrayList(initialCapacity); mappingQuality = 0.0; - this.readTag = readTag; this.header = header; this.readGroupRecord = readGroupRecord; this.contig = contig; @@ -165,13 +162,12 @@ public class SyntheticRead { this.isNegativeStrand = isNegativeRead; } - public SyntheticRead(ObjectArrayList bases, ByteArrayList counts, ByteArrayList quals, ByteArrayList insertionQuals, ByteArrayList deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) { + public SyntheticRead(ObjectArrayList bases, ByteArrayList counts, ByteArrayList quals, ByteArrayList insertionQuals, ByteArrayList deletionQuals, double mappingQuality, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) { basesCountsQuals = new ObjectArrayList(bases.size()); for (int i = 0; i < bases.size(); ++i) { basesCountsQuals.add(new SingleBaseInfo(bases.get(i).getOrdinalByte(), counts.get(i), quals.get(i), insertionQuals.get(i), deletionQuals.get(i))); } this.mappingQuality = mappingQuality; - this.readTag = readTag; this.header = header; this.readGroupRecord = readGroupRecord; this.contig = contig; @@ -228,7 +224,7 @@ public class SyntheticRead { read.setReadBases(convertReadBases()); read.setMappingQuality((int) Math.ceil(mappingQuality / basesCountsQuals.size())); read.setReadGroup(readGroupRecord); - read.setAttribute(readTag, convertBaseCounts()); + read.setReducedReadCounts(convertBaseCounts()); if (hasIndelQualities) { read.setBaseQualities(convertInsertionQualities(), EventType.BASE_INSERTION); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java index 1ed28dec2..570b797ca 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java @@ -77,7 +77,7 @@ public void testBaseCounts() { new TestRead(bases, quals, new byte[] {1, 127, 51, 126}, new byte [] {1, 126, 50, 125})}; for (TestRead testRead : testReads) { - SyntheticRead syntheticRead = new SyntheticRead(new ObjectArrayList(testRead.getBases()), new ByteArrayList(testRead.getCounts()), new ByteArrayList(testRead.getQuals()), new ByteArrayList(testRead.getInsQuals()), new ByteArrayList(testRead.getDelQuals()), artificialMappingQuality, GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false); + SyntheticRead syntheticRead = new SyntheticRead(new ObjectArrayList(testRead.getBases()), new ByteArrayList(testRead.getCounts()), new ByteArrayList(testRead.getQuals()), new ByteArrayList(testRead.getInsQuals()), new ByteArrayList(testRead.getDelQuals()), artificialMappingQuality, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false); Assert.assertEquals(syntheticRead.convertBaseCounts(), testRead.getExpectedCounts()); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 2e3e45247..fcf9168b3 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -63,7 +63,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a2232995ca9bec143e664748845a0045"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "b83b53741edb07218045d6f25f20a18b"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index bf2ddea12..8ed589c63 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "8f33e40686443b9a72de45d5a9da1861"); + HCTest(CEUTRIO_BAM, "", "4a2880f0753e6e813b9e0c35209b3708"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "8f2b047cdace0ef122d6ad162e7bc5b9"); + HCTest(NA12878_BAM, "", "588892934f2e81247bf32e457db88449"); } @Test(enabled = false) @@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "9d4be26a2c956ba4b7b4044820eab030"); + "fa1b92373c89d2238542a319ad25c257"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("03557376242bdf78c5237703b762573b")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("9296f1af6cf1f1cc4b79494eb366e976")); executeTest("HCTestStructuralIndels: ", spec); } @@ -134,7 +134,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("adb08cb25e902cfe0129404a682b2169")); + Arrays.asList("cf0a1bfded656153578df6cf68aa68a2")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -142,7 +142,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("a43c595a617589388ff3d7e2ddc661e7")); + Arrays.asList("addceb63f5bfa9f11e15335d5bf641e9")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index fa0187728..99f1d99c7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -263,6 +263,7 @@ public final class FragmentUtils { } final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() ); + returnRead.setIsStrandless(true); returnRead.setAlignmentStart( firstRead.getSoftStart() ); returnRead.setReadBases( bases ); returnRead.setBaseQualities( quals ); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 01a8c1996..c5f9f606b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -74,6 +74,8 @@ public class GATKSAMRecord extends BAMRecord { private int softEnd = UNINITIALIZED; private Integer adapterBoundary = null; + private boolean isStrandlessRead = false; + // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; private boolean retrievedReduceReadCounts = false; @@ -141,6 +143,45 @@ public class GATKSAMRecord extends BAMRecord { return ArtificialSAMUtils.createArtificialRead(cigar); } + /////////////////////////////////////////////////////////////////////////////// + // *** support for reads without meaningful strand information ***// + /////////////////////////////////////////////////////////////////////////////// + + /** + * Does this read have a meaningful strandedness value? + * + * Some advanced types of reads, such as reads coming from merged fragments, + * don't have meaningful strandedness values, as they are composites of multiple + * other reads. Strandless reads need to be handled specially by code that cares about + * stranded information, such as FS. + * + * @return true if this read doesn't have meaningful strand information + */ + public boolean isStrandless() { + return isStrandlessRead; + } + + /** + * Set the strandless state of this read to isStrandless + * @param isStrandless true if this read doesn't have a meaningful strandedness value + */ + public void setIsStrandless(final boolean isStrandless) { + this.isStrandlessRead = isStrandless; + } + + @Override + public boolean getReadNegativeStrandFlag() { + return ! isStrandless() && super.getReadNegativeStrandFlag(); + } + + @Override + public void setReadNegativeStrandFlag(boolean flag) { + if ( isStrandless() ) + throw new IllegalStateException("Cannot set the strand of a strandless read"); + super.setReadNegativeStrandFlag(flag); + } + + /////////////////////////////////////////////////////////////////////////////// // *** The following methods are overloaded to cache the appropriate data ***// /////////////////////////////////////////////////////////////////////////////// @@ -313,6 +354,15 @@ public class GATKSAMRecord extends BAMRecord { return getReducedReadCounts() != null; } + /** + * Set the reduced read counts for this record to counts + * @param counts the count array + */ + public void setReducedReadCounts(final byte[] counts) { + retrievedReduceReadCounts = false; + setAttribute(REDUCED_READ_CONSENSUS_TAG, counts); + } + /** * The number of bases corresponding the i'th base of the reduced read. * diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java index 89d192f9e..4f49eb933 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -229,6 +229,7 @@ public class FragmentUtilsUnitTest extends BaseTest { if ( expectedMerged == null ) { Assert.assertNull(actual, "Expected reads not to merge, but got non-null result from merging"); } else { + Assert.assertTrue(actual.isStrandless(), "Merged reads should be strandless"); Assert.assertNotNull(actual, "Expected reads to merge, but got null result from merging"); // I really care about the bases, the quals, the CIGAR, and the read group tag Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString()); diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java index baf4bfbb0..38840fab1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -64,6 +64,7 @@ public class GATKSAMRecordUnitTest extends BaseTest { for (int i = 0; i < reducedRead.getReadLength(); i++) { Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); } + Assert.assertEquals(reducedRead.isStrandless(), false, "Reduced reads don't have meaningful strandedness information"); } @Test @@ -103,7 +104,35 @@ public class GATKSAMRecordUnitTest extends BaseTest { read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null); Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart()); Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd()); - } + @Test + public void testStrandlessReads() { + final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; + final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 }; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); + Assert.assertEquals(read.isStrandless(), false); + + read.setReadNegativeStrandFlag(false); + Assert.assertEquals(read.isStrandless(), false); + Assert.assertEquals(read.getReadNegativeStrandFlag(), false); + + read.setReadNegativeStrandFlag(true); + Assert.assertEquals(read.isStrandless(), false); + Assert.assertEquals(read.getReadNegativeStrandFlag(), true); + + read.setReadNegativeStrandFlag(true); + read.setIsStrandless(true); + Assert.assertEquals(read.isStrandless(), true); + Assert.assertEquals(read.getReadNegativeStrandFlag(), false, "negative strand flag should return false even through its set for a strandless read"); + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testStrandlessReadsFailSetStrand() { + final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; + final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 }; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); + read.setIsStrandless(true); + read.setReadNegativeStrandFlag(true); + } }