diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 14c785678..39fdcb707 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -47,6 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import cern.jet.math.Arithmetic; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -74,6 +75,8 @@ import java.util.*; * calculated for certain complex indel cases or for multi-allelic sites. */ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { + private final static Logger logger = Logger.getLogger(FisherStrand.class); + private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; private static final int MIN_QUAL_FOR_FILTERED_TEST = 17; @@ -95,6 +98,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat else if (stratifiedPerReadAlleleLikelihoodMap != null) { // either SNP with no alignment context, or indels: per-read likelihood map needed final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc); +// logger.info("VC " + vc); +// printTable(table, 0.0); return pValueForBestTable(table, null); } else @@ -131,9 +136,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private Map annotationForOneTable(final double pValue) { final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(Math.max(pValue, MIN_PVALUE))); // prevent INFINITYs return Collections.singletonMap(FS, value); -// Map map = new HashMap(); -// map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pValue))); -// return map; } public List getKeyNames() { @@ -192,7 +194,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private static void printTable(int[][] table, double pValue) { - System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue); + logger.info(String.format("%d %d; %d %d : %f", table[0][0], table[0][1], table[1][0], table[1][1], pValue)); } private static boolean rotateTable(int[][] table) { @@ -315,13 +317,21 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat final boolean matchesAlt = allele.equals(alt, true); if ( matchesRef || matchesAlt ) { + final int row = matchesRef ? 0 : 1; - final boolean isFW = !read.getReadNegativeStrandFlag(); - - int row = matchesRef ? 0 : 1; - int column = isFW ? 0 : 1; - - table[row][column] += representativeCount; + if ( read.isStrandless() ) { + // a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1 + // (the 1 is to ensure that a strandless read always counts as an observation on both strands, even + // if the read is only seen once, because it's a merged read or other) + final int toAdd = Math.max(representativeCount / 2, 1); + table[row][0] += toAdd; + table[row][1] += toAdd; + } else { + // a normal read with an actual strand + final boolean isFW = !read.getReadNegativeStrandFlag(); + final int column = isFW ? 0 : 1; + table[row][column] += representativeCount; + } } } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 6c063110e..11e023b9b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -567,7 +567,7 @@ public class SlidingWindow { ObjectArrayList result = new ObjectArrayList(); if (filteredDataConsensus == null) - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); + filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, isNegativeStrand); ListIterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { @@ -583,7 +583,7 @@ public class SlidingWindow { if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) { result.add(finalizeFilteredDataConsensus()); - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); + filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, isNegativeStrand); } genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts(), headerElement.getRMS()); @@ -606,7 +606,7 @@ public class SlidingWindow { @Requires({"start >= 0 && (end >= start || end == 0)"}) private void addToRunningConsensus(LinkedList header, int start, int end, boolean isNegativeStrand) { if (runningConsensus == null) - runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); + runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, isNegativeStrand); Iterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java index 72fd52ebe..451e50286 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java @@ -124,8 +124,7 @@ public class SyntheticRead { private final ObjectArrayList basesCountsQuals; - private double mappingQuality; // the average of the rms of the mapping qualities of all the reads that contributed to this consensus - private String readTag; + private double mappingQuality; // Information to produce a GATKSAMRecord private SAMFileHeader header; @@ -147,14 +146,12 @@ public class SyntheticRead { * @param contigIndex the read's contig index * @param readName the read's name * @param refStart the alignment start (reference based) - * @param readTag the reduce reads tag for the synthetic read */ - public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, String readTag, boolean hasIndelQualities, boolean isNegativeRead) { + public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) { final int initialCapacity = 10000; basesCountsQuals = new ObjectArrayList(initialCapacity); mappingQuality = 0.0; - this.readTag = readTag; this.header = header; this.readGroupRecord = readGroupRecord; this.contig = contig; @@ -165,13 +162,12 @@ public class SyntheticRead { this.isNegativeStrand = isNegativeRead; } - public SyntheticRead(ObjectArrayList bases, ByteArrayList counts, ByteArrayList quals, ByteArrayList insertionQuals, ByteArrayList deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) { + public SyntheticRead(ObjectArrayList bases, ByteArrayList counts, ByteArrayList quals, ByteArrayList insertionQuals, ByteArrayList deletionQuals, double mappingQuality, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) { basesCountsQuals = new ObjectArrayList(bases.size()); for (int i = 0; i < bases.size(); ++i) { basesCountsQuals.add(new SingleBaseInfo(bases.get(i).getOrdinalByte(), counts.get(i), quals.get(i), insertionQuals.get(i), deletionQuals.get(i))); } this.mappingQuality = mappingQuality; - this.readTag = readTag; this.header = header; this.readGroupRecord = readGroupRecord; this.contig = contig; @@ -228,7 +224,7 @@ public class SyntheticRead { read.setReadBases(convertReadBases()); read.setMappingQuality((int) Math.ceil(mappingQuality / basesCountsQuals.size())); read.setReadGroup(readGroupRecord); - read.setAttribute(readTag, convertBaseCounts()); + read.setReducedReadCounts(convertBaseCounts()); if (hasIndelQualities) { read.setBaseQualities(convertInsertionQualities(), EventType.BASE_INSERTION); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java index 1ed28dec2..570b797ca 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java @@ -77,7 +77,7 @@ public void testBaseCounts() { new TestRead(bases, quals, new byte[] {1, 127, 51, 126}, new byte [] {1, 126, 50, 125})}; for (TestRead testRead : testReads) { - SyntheticRead syntheticRead = new SyntheticRead(new ObjectArrayList(testRead.getBases()), new ByteArrayList(testRead.getCounts()), new ByteArrayList(testRead.getQuals()), new ByteArrayList(testRead.getInsQuals()), new ByteArrayList(testRead.getDelQuals()), artificialMappingQuality, GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false); + SyntheticRead syntheticRead = new SyntheticRead(new ObjectArrayList(testRead.getBases()), new ByteArrayList(testRead.getCounts()), new ByteArrayList(testRead.getQuals()), new ByteArrayList(testRead.getInsQuals()), new ByteArrayList(testRead.getDelQuals()), artificialMappingQuality, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false); Assert.assertEquals(syntheticRead.convertBaseCounts(), testRead.getExpectedCounts()); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 2e3e45247..fcf9168b3 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -63,7 +63,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a2232995ca9bec143e664748845a0045"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "b83b53741edb07218045d6f25f20a18b"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index bf2ddea12..8ed589c63 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "8f33e40686443b9a72de45d5a9da1861"); + HCTest(CEUTRIO_BAM, "", "4a2880f0753e6e813b9e0c35209b3708"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "8f2b047cdace0ef122d6ad162e7bc5b9"); + HCTest(NA12878_BAM, "", "588892934f2e81247bf32e457db88449"); } @Test(enabled = false) @@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "9d4be26a2c956ba4b7b4044820eab030"); + "fa1b92373c89d2238542a319ad25c257"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("03557376242bdf78c5237703b762573b")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("9296f1af6cf1f1cc4b79494eb366e976")); executeTest("HCTestStructuralIndels: ", spec); } @@ -134,7 +134,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("adb08cb25e902cfe0129404a682b2169")); + Arrays.asList("cf0a1bfded656153578df6cf68aa68a2")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -142,7 +142,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("a43c595a617589388ff3d7e2ddc661e7")); + Arrays.asList("addceb63f5bfa9f11e15335d5bf641e9")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 45a2fa58d..ff64133a7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -415,6 +415,24 @@ public class Utils { return C; } + /** + * Concatenates byte arrays + * @return a concat of all bytes in allBytes in order + */ + public static byte[] concat(final byte[] ... allBytes) { + int size = 0; + for ( final byte[] bytes : allBytes ) size += bytes.length; + + final byte[] c = new byte[size]; + int offset = 0; + for ( final byte[] bytes : allBytes ) { + System.arraycopy(bytes, 0, c, offset, bytes.length); + offset += bytes.length; + } + + return c; + } + /** * Appends String(s) B to array A. * @param A First array. diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 76ccede62..99f1d99c7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.utils.fragments; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; @@ -56,7 +58,8 @@ import java.util.*; * Date: 3/26/11 * Time: 10:09 PM */ -public class FragmentUtils { +public final class FragmentUtils { + protected final static byte MIN_QUAL_BAD_OVERLAP = 16; private FragmentUtils() {} // private constructor /** @@ -65,18 +68,28 @@ public class FragmentUtils { * Allows us to write a generic T -> Fragment algorithm that works with any object containing * a read. * - * @param + * @param The type of the object that contains a GATKSAMRecord */ public interface ReadGetter { + /** + * Get the GATKSAMRecord associated with object + * + * @param object the thing that contains the read + * @return a non-null GATKSAMRecord read + */ public GATKSAMRecord get(T object); } - /** Identify getter for SAMRecords themselves */ + /** + * Identify getter for SAMRecords themselves + */ private final static ReadGetter SamRecordGetter = new ReadGetter() { @Override public GATKSAMRecord get(final GATKSAMRecord object) { return object; } }; - /** Gets the SAMRecord in a PileupElement */ + /** + * Gets the SAMRecord in a PileupElement + */ private final static ReadGetter PileupElementGetter = new ReadGetter() { @Override public GATKSAMRecord get(final PileupElement object) { return object.getRead(); } }; @@ -87,13 +100,20 @@ public class FragmentUtils { * and returns a FragmentCollection that contains the T objects whose underlying reads either overlap (or * not) with their mate pairs. * - * @param readContainingObjects - * @param nElements - * @param getter + * @param readContainingObjects An iterator of objects that contain GATKSAMRecords + * @param nElements the number of elements to be provided by the iterator, which is usually known upfront and + * greatly improves the efficiency of the fragment calculation + * @param getter a helper function that takes an object of type T and returns is associated GATKSAMRecord * @param - * @return + * @return a fragment collection */ - private final static FragmentCollection create(Iterable readContainingObjects, int nElements, ReadGetter getter) { + @Requires({ + "readContainingObjects != null", + "nElements >= 0", + "getter != null" + }) + @Ensures("result != null") + private static FragmentCollection create(final Iterable readContainingObjects, final int nElements, final ReadGetter getter) { Collection singletons = null; Collection> overlapping = null; Map nameMap = null; @@ -145,30 +165,69 @@ public class FragmentUtils { return new FragmentCollection(singletons, overlapping); } - public final static FragmentCollection create(ReadBackedPileup rbp) { + /** + * Create a FragmentCollection containing PileupElements from the ReadBackedPileup rbp + * @param rbp a non-null read-backed pileup. The elements in this ReadBackedPileup must be ordered + * @return a non-null FragmentCollection + */ + @Ensures("result != null") + public static FragmentCollection create(final ReadBackedPileup rbp) { + if ( rbp == null ) throw new IllegalArgumentException("Pileup cannot be null"); return create(rbp, rbp.getNumberOfElements(), PileupElementGetter); } - public final static FragmentCollection create(List reads) { + /** + * Create a FragmentCollection containing GATKSAMRecords from a list of reads + * + * @param reads a non-null list of reads, ordered by their start location + * @return a non-null FragmentCollection + */ + @Ensures("result != null") + public static FragmentCollection create(final List reads) { + if ( reads == null ) throw new IllegalArgumentException("Pileup cannot be null"); return create(reads, reads.size(), SamRecordGetter); } - public final static List mergeOverlappingPairedFragments( final List overlappingPair ) { - final byte MIN_QUAL_BAD_OVERLAP = 16; + public static List mergeOverlappingPairedFragments( final List overlappingPair ) { if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); } - GATKSAMRecord firstRead = overlappingPair.get(0); - GATKSAMRecord secondRead = overlappingPair.get(1); + final GATKSAMRecord firstRead = overlappingPair.get(0); + final GATKSAMRecord secondRead = overlappingPair.get(1); + + final GATKSAMRecord merged; + if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { + merged = mergeOverlappingPairedFragments(secondRead, firstRead); + } else { + merged = mergeOverlappingPairedFragments(firstRead, secondRead); + } + + return merged == null ? overlappingPair : Collections.singletonList(merged); + } + + /** + * Merge two overlapping reads from the same fragment into a single super read, if possible + * + * firstRead and secondRead must be part of the same fragment (though this isn't checked). Looks + * at the bases and alignment, and tries its best to create a meaningful synthetic single super read + * that represents the entire sequenced fragment. + * + * Assumes that firstRead starts before secondRead (according to their soft clipped starts) + * + * @param firstRead the left most read + * @param firstRead the right most read + * + * @return a strandless merged read of first and second, or null if the algorithm cannot create a meaningful one + */ + public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord firstRead, final GATKSAMRecord secondRead) { + if ( firstRead == null ) throw new IllegalArgumentException("firstRead cannot be null"); + if ( secondRead == null ) throw new IllegalArgumentException("secondRead cannot be null"); + if ( ! firstRead.getReadName().equals(secondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + firstRead + " and " + secondRead); if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { - firstRead = overlappingPair.get(1); // swap them - secondRead = overlappingPair.get(0); - } - if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { - return overlappingPair; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A + return null; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A } if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) { - return overlappingPair; // fragments contain indels so don't merge them + return null; // fragments contain indels so don't merge them } final Pair pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart()); @@ -190,10 +249,10 @@ public class FragmentUtils { } for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) { if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) { - return overlappingPair; // high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them + return null; // high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them } if( firstReadQuals[iii] < MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] < MIN_QUAL_BAD_OVERLAP ) { - return overlappingPair; // both reads have low qual bases in the overlap region so don't merge them because don't know what is going on + return null; // both reads have low qual bases in the overlap region so don't merge them because don't know what is going on } bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] ); quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] ); @@ -204,6 +263,7 @@ public class FragmentUtils { } final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() ); + returnRead.setIsStrandless(true); returnRead.setAlignmentStart( firstRead.getSoftStart() ); returnRead.setReadBases( bases ); returnRead.setBaseQualities( quals ); @@ -237,8 +297,6 @@ public class FragmentUtils { returnRead.setBaseQualities( deletionQuals, EventType.BASE_DELETION ); } - final ArrayList returnList = new ArrayList(); - returnList.add(returnRead); - return returnList; + return returnRead; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 01a8c1996..c5f9f606b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -74,6 +74,8 @@ public class GATKSAMRecord extends BAMRecord { private int softEnd = UNINITIALIZED; private Integer adapterBoundary = null; + private boolean isStrandlessRead = false; + // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; private boolean retrievedReduceReadCounts = false; @@ -141,6 +143,45 @@ public class GATKSAMRecord extends BAMRecord { return ArtificialSAMUtils.createArtificialRead(cigar); } + /////////////////////////////////////////////////////////////////////////////// + // *** support for reads without meaningful strand information ***// + /////////////////////////////////////////////////////////////////////////////// + + /** + * Does this read have a meaningful strandedness value? + * + * Some advanced types of reads, such as reads coming from merged fragments, + * don't have meaningful strandedness values, as they are composites of multiple + * other reads. Strandless reads need to be handled specially by code that cares about + * stranded information, such as FS. + * + * @return true if this read doesn't have meaningful strand information + */ + public boolean isStrandless() { + return isStrandlessRead; + } + + /** + * Set the strandless state of this read to isStrandless + * @param isStrandless true if this read doesn't have a meaningful strandedness value + */ + public void setIsStrandless(final boolean isStrandless) { + this.isStrandlessRead = isStrandless; + } + + @Override + public boolean getReadNegativeStrandFlag() { + return ! isStrandless() && super.getReadNegativeStrandFlag(); + } + + @Override + public void setReadNegativeStrandFlag(boolean flag) { + if ( isStrandless() ) + throw new IllegalStateException("Cannot set the strand of a strandless read"); + super.setReadNegativeStrandFlag(flag); + } + + /////////////////////////////////////////////////////////////////////////////// // *** The following methods are overloaded to cache the appropriate data ***// /////////////////////////////////////////////////////////////////////////////// @@ -313,6 +354,15 @@ public class GATKSAMRecord extends BAMRecord { return getReducedReadCounts() != null; } + /** + * Set the reduced read counts for this record to counts + * @param counts the count array + */ + public void setReducedReadCounts(final byte[] counts) { + retrievedReduceReadCounts = false; + setAttribute(REDUCED_READ_CONSENSUS_TAG, counts); + } + /** * The number of bases corresponding the i'th base of the reduced read. * diff --git a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java index 705db6f85..154b000ce 100644 --- a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java @@ -112,6 +112,19 @@ public class UtilsUnitTest extends BaseTest { Assert.assertTrue("one-1;two-2;three-1;four-2;five-1;six-2".equals(joined)); } + @Test + public void testConcat() { + final String s1 = "A"; + final String s2 = "CC"; + final String s3 = "TTT"; + final String s4 = "GGGG"; + Assert.assertEquals(new String(Utils.concat()), ""); + Assert.assertEquals(new String(Utils.concat(s1.getBytes())), s1); + Assert.assertEquals(new String(Utils.concat(s1.getBytes(), s2.getBytes())), s1 + s2); + Assert.assertEquals(new String(Utils.concat(s1.getBytes(), s2.getBytes(), s3.getBytes())), s1 + s2 + s3); + Assert.assertEquals(new String(Utils.concat(s1.getBytes(), s2.getBytes(), s3.getBytes(), s4.getBytes())), s1 + s2 + s3 + s4); + } + @Test public void testEscapeExpressions() { String[] expected, actual; diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java index 15d69c400..4f49eb933 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -27,23 +27,30 @@ package org.broadinstitute.sting.utils.fragments; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.BeforeTest; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * Test routines for read-backed pileup. */ public class FragmentUtilsUnitTest extends BaseTest { private static SAMFileHeader header; + private static GATKSAMReadGroupRecord rgForMerged; + private final static boolean DEBUG = false; private class FragmentUtilsTest extends TestDataProvider { List statesForPileup = new ArrayList(); @@ -119,7 +126,7 @@ public class FragmentUtilsUnitTest extends BaseTest { return FragmentUtilsTest.getTests(FragmentUtilsTest.class); } - @Test(enabled = true, dataProvider = "fragmentUtilsTest") + @Test(enabled = !DEBUG, dataProvider = "fragmentUtilsTest") public void testAsPileup(FragmentUtilsTest test) { for ( TestState testState : test.statesForPileup ) { ReadBackedPileup rbp = testState.pileup; @@ -129,7 +136,7 @@ public class FragmentUtilsUnitTest extends BaseTest { } } - @Test(enabled = true, dataProvider = "fragmentUtilsTest") + @Test(enabled = !DEBUG, dataProvider = "fragmentUtilsTest") public void testAsListOfReadsFromPileup(FragmentUtilsTest test) { for ( TestState testState : test.statesForPileup ) { FragmentCollection fp = FragmentUtils.create(testState.pileup.getReads()); @@ -138,7 +145,7 @@ public class FragmentUtilsUnitTest extends BaseTest { } } - @Test(enabled = true, dataProvider = "fragmentUtilsTest") + @Test(enabled = !DEBUG, dataProvider = "fragmentUtilsTest") public void testAsListOfReads(FragmentUtilsTest test) { for ( TestState testState : test.statesForReads ) { FragmentCollection fp = FragmentUtils.create(testState.rawReads); @@ -147,7 +154,7 @@ public class FragmentUtilsUnitTest extends BaseTest { } } - @Test(enabled = true, expectedExceptions = IllegalArgumentException.class) + @Test(enabled = !DEBUG, expectedExceptions = IllegalArgumentException.class) public void testOutOfOrder() { final List pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true); final GATKSAMRecord left = pair.get(0); @@ -161,5 +168,76 @@ public class FragmentUtilsUnitTest extends BaseTest { @BeforeTest public void setup() { header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000); + rgForMerged = new GATKSAMReadGroupRecord("RG1"); + } + + @DataProvider(name = "MergeFragmentsTest") + public Object[][] createMergeFragmentsTest() throws Exception { + List tests = new ArrayList(); + + final String leftFlank = "CCC"; + final String rightFlank = "AAA"; + final String allOverlappingBases = "ACGTACGTGGAACCTTAG"; + for ( int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++ ) { + final String overlappingBases = allOverlappingBases.substring(0, overlapSize); + final byte[] overlappingBaseQuals = new byte[overlapSize]; + for ( int i = 0; i < overlapSize; i++ ) overlappingBaseQuals[i] = (byte)(i + 30); + final GATKSAMRecord read1 = makeOverlappingRead(leftFlank, 20, overlappingBases, overlappingBaseQuals, "", 30, 1); + final GATKSAMRecord read2 = makeOverlappingRead("", 20, overlappingBases, overlappingBaseQuals, rightFlank, 30, leftFlank.length() + 1); + final GATKSAMRecord merged = makeOverlappingRead(leftFlank, 20, overlappingBases, overlappingBaseQuals, rightFlank, 30, 1); + tests.add(new Object[]{"equalQuals", read1, read2, merged}); + + // test that the merged read base quality is the + tests.add(new Object[]{"lowQualLeft", modifyBaseQualities(read1, leftFlank.length(), overlapSize), read2, merged}); + tests.add(new Object[]{"lowQualRight", read1, modifyBaseQualities(read2, 0, overlapSize), merged}); + } + + return tests.toArray(new Object[][]{}); + } + + private GATKSAMRecord modifyBaseQualities(final GATKSAMRecord read, final int startOffset, final int length) throws Exception { + final GATKSAMRecord readWithLowQuals = (GATKSAMRecord)read.clone(); + final byte[] withLowQuals = Arrays.copyOf(read.getBaseQualities(), read.getBaseQualities().length); + for ( int i = startOffset; i < startOffset + length; i++ ) + withLowQuals[i] = (byte)(read.getBaseQualities()[i] + (i % 2 == 0 ? -1 : 0)); + readWithLowQuals.setBaseQualities(withLowQuals); + return readWithLowQuals; + } + + private GATKSAMRecord makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases, + final byte[] overlapQuals, final String rightFlank, final int rightQual, + final int alignmentStart) { + final String bases = leftFlank + overlapBases + rightFlank; + final int readLength = bases.length(); + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, alignmentStart, readLength); + final byte[] leftQuals = Utils.dupBytes((byte) leftQual, leftFlank.length()); + final byte[] rightQuals = Utils.dupBytes((byte) rightQual, rightFlank.length()); + final byte[] quals = Utils.concat(leftQuals, overlapQuals, rightQuals); + read.setCigarString(readLength + "M"); + read.setReadBases(bases.getBytes()); + for ( final EventType type : EventType.values() ) + read.setBaseQualities(quals, type); + read.setReadGroup(rgForMerged); + read.setMappingQuality(60); + return read; + } + + @Test(enabled = true, dataProvider = "MergeFragmentsTest") + public void testMergingTwoReads(final String name, final GATKSAMRecord read1, GATKSAMRecord read2, final GATKSAMRecord expectedMerged) { + final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); + + if ( expectedMerged == null ) { + Assert.assertNull(actual, "Expected reads not to merge, but got non-null result from merging"); + } else { + Assert.assertTrue(actual.isStrandless(), "Merged reads should be strandless"); + Assert.assertNotNull(actual, "Expected reads to merge, but got null result from merging"); + // I really care about the bases, the quals, the CIGAR, and the read group tag + Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString()); + Assert.assertEquals(actual.getReadBases(), expectedMerged.getReadBases()); + Assert.assertEquals(actual.getReadGroup(), expectedMerged.getReadGroup()); + Assert.assertEquals(actual.getMappingQuality(), expectedMerged.getMappingQuality()); + for ( final EventType type : EventType.values() ) + Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type); + } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java index baf4bfbb0..38840fab1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -64,6 +64,7 @@ public class GATKSAMRecordUnitTest extends BaseTest { for (int i = 0; i < reducedRead.getReadLength(); i++) { Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); } + Assert.assertEquals(reducedRead.isStrandless(), false, "Reduced reads don't have meaningful strandedness information"); } @Test @@ -103,7 +104,35 @@ public class GATKSAMRecordUnitTest extends BaseTest { read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null); Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart()); Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd()); - } + @Test + public void testStrandlessReads() { + final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; + final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 }; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); + Assert.assertEquals(read.isStrandless(), false); + + read.setReadNegativeStrandFlag(false); + Assert.assertEquals(read.isStrandless(), false); + Assert.assertEquals(read.getReadNegativeStrandFlag(), false); + + read.setReadNegativeStrandFlag(true); + Assert.assertEquals(read.isStrandless(), false); + Assert.assertEquals(read.getReadNegativeStrandFlag(), true); + + read.setReadNegativeStrandFlag(true); + read.setIsStrandless(true); + Assert.assertEquals(read.isStrandless(), true); + Assert.assertEquals(read.getReadNegativeStrandFlag(), false, "negative strand flag should return false even through its set for a strandless read"); + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testStrandlessReadsFailSetStrand() { + final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; + final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 }; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); + read.setIsStrandless(true); + read.setReadNegativeStrandFlag(true); + } }