diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index 253fdca48..2257adf6a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -99,10 +99,6 @@ public class ConsensusAlleleCounter { Map contexts, AlignmentContextUtils.ReadOrientation contextType) { final Map consensusIndelStrings = countConsensusAlleles(ref, contexts, contextType); -// logger.info("Alleles at " + ref.getLocus()); -// for ( Map.Entry elt : consensusIndelStrings.entrySet() ) { -// logger.info(" " + elt.getValue() + " => " + elt.getKey()); -// } return consensusCountsToAlleles(ref, consensusIndelStrings); } @@ -138,14 +134,9 @@ public class ConsensusAlleleCounter { final int nReadsOverall = indelPileup.getNumberOfElements(); if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample ) { -// if ( nIndelReads > 0 ) -// logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall); continue; -// } else { -// logger.info("### Keeping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall); } - for (PileupElement p : indelPileup) { final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); if (read == null) @@ -154,17 +145,10 @@ public class ConsensusAlleleCounter { continue; } -/* if (DEBUG && p.isIndel()) { - System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n", - read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(), - p.getEventLength(),p.getType().toString(), p.getEventBases()); - } - */ - String indelString = p.getEventBases(); - if ( p.isBeforeInsertion() ) { - // edge case: ignore a deletion immediately preceding an insertion as p.getEventBases() returns null [EB] - if ( indelString == null ) + final String insertionBases = p.getBasesOfImmediatelyFollowingInsertion(); + // edge case: ignore a deletion immediately preceding an insertion as p.getBasesOfImmediatelyFollowingInsertion() returns null [EB] + if ( insertionBases == null ) continue; boolean foundKey = false; @@ -182,20 +166,20 @@ public class ConsensusAlleleCounter { String s = cList.get(k).getFirst(); int cnt = cList.get(k).getSecond(); // case 1: current insertion is prefix of indel in hash map - if (s.startsWith(indelString)) { + if (s.startsWith(insertionBases)) { cList.set(k,new Pair(s,cnt+1)); foundKey = true; } - else if (indelString.startsWith(s)) { + else if (insertionBases.startsWith(s)) { // case 2: indel stored in hash table is prefix of current insertion // In this case, new bases are new key. foundKey = true; - cList.set(k,new Pair(indelString,cnt+1)); + cList.set(k,new Pair(insertionBases,cnt+1)); } } if (!foundKey) // none of the above: event bases not supported by previous table, so add new key - cList.add(new Pair(indelString,1)); + cList.add(new Pair(insertionBases,1)); } else if (read.getAlignmentStart() == loc.getStart()+1) { @@ -203,28 +187,28 @@ public class ConsensusAlleleCounter { for (int k=0; k < cList.size(); k++) { String s = cList.get(k).getFirst(); int cnt = cList.get(k).getSecond(); - if (s.endsWith(indelString)) { + if (s.endsWith(insertionBases)) { // case 1: current insertion (indelString) is suffix of indel in hash map (s) cList.set(k,new Pair(s,cnt+1)); foundKey = true; } - else if (indelString.endsWith(s)) { + else if (insertionBases.endsWith(s)) { // case 2: indel stored in hash table is prefix of current insertion // In this case, new bases are new key. foundKey = true; - cList.set(k,new Pair(indelString,cnt+1)); + cList.set(k,new Pair(insertionBases,cnt+1)); } } if (!foundKey) // none of the above: event bases not supported by previous table, so add new key - cList.add(new Pair(indelString,1)); + cList.add(new Pair(insertionBases,1)); } else { // normal case: insertion somewhere in the middle of a read: add count to arrayList - int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0; - cList.add(new Pair(indelString,cnt+1)); + int cnt = consensusIndelStrings.containsKey(insertionBases)? consensusIndelStrings.get(insertionBases):0; + cList.add(new Pair(insertionBases,cnt+1)); } // copy back arrayList into hashMap @@ -235,10 +219,9 @@ public class ConsensusAlleleCounter { } else if ( p.isBeforeDeletionStart() ) { - indelString = String.format("D%d",p.getEventLength()); - int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0; - consensusIndelStrings.put(indelString,cnt+1); - + final String deletionString = String.format("D%d",p.getLengthOfImmediatelyFollowingIndel()); + int cnt = consensusIndelStrings.containsKey(deletionString)? consensusIndelStrings.get(deletionString):0; + consensusIndelStrings.put(deletionString,cnt+1); } } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java index 12af7839a..1b004d889 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java @@ -214,7 +214,7 @@ public class ErrorModel { if (DEBUG) System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n", pileupElement.getBase(), pileupElement.isBeforeDeletionStart(), - pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString()); + pileupElement.isBeforeInsertion(),pileupElement.getBasesOfImmediatelyFollowingInsertion(),pileupElement.getLengthOfImmediatelyFollowingIndel(), allele.toString(), refAllele.toString()); //pileupElement. // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch @@ -238,11 +238,11 @@ public class ErrorModel { // for non-ref alleles, byte[] alleleBases = allele.getBases(); int eventLength = alleleBases.length - refAllele.getBases().length; - if (eventLength < 0 && pileupElement.isBeforeDeletionStart() && pileupElement.getEventLength() == -eventLength) + if (eventLength < 0 && pileupElement.isBeforeDeletionStart() && pileupElement.getLengthOfImmediatelyFollowingIndel() == -eventLength) return true; if (eventLength > 0 && pileupElement.isBeforeInsertion() && - Arrays.equals(pileupElement.getEventBases().getBytes(),Arrays.copyOfRange(alleleBases,1,alleleBases.length))) // allele contains ref byte, but pileupElement's event bases doesn't + Arrays.equals(pileupElement.getBasesOfImmediatelyFollowingInsertion().getBytes(),Arrays.copyOfRange(alleleBases,1,alleleBases.length))) // allele contains ref byte, but pileupElement's event bases doesn't return true; return false; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java index 7bbe470f8..c957bb9db 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java @@ -210,7 +210,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype // count number of elements in pileup for (PileupElement elt : pileup) { if (VERBOSE) - System.out.format("base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d\n",elt.getBase(), elt.isBeforeDeletionStart(),elt.isBeforeInsertion(),elt.getEventBases(),elt.getEventLength()); + System.out.format("base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d\n",elt.getBase(), elt.isBeforeDeletionStart(),elt.isBeforeInsertion(),elt.getBasesOfImmediatelyFollowingInsertion(),elt.getLengthOfImmediatelyFollowingIndel()); int idx =0; for (Allele allele : alleles) { int cnt = numSeenBases.get(idx); diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 0f3bc4fd9..d94fd1214 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import java.util.Arrays; import java.util.EnumSet; import java.util.LinkedList; import java.util.List; @@ -157,21 +158,67 @@ public class PileupElement implements Comparable { } /** - * @return length of the event (number of inserted or deleted bases + * Get the length of an immediately following insertion or deletion event, or 0 if no such event exists + * + * Only returns a positive value when this pileup element is immediately before an indel. Being + * immediately before a deletion means that this pileup element isn't an deletion, and that the + * next genomic alignment for this read is a deletion. For the insertion case, this means + * that an insertion cigar occurs immediately after this element, between this one and the + * next genomic position. + * + * Note this function may be expensive, so multiple uses should be cached by the caller + * + * @return length of the event (number of inserted or deleted bases), or 0 */ - @Deprecated - public int getEventLength() { - // TODO -- compute on the fly, provide meaningful function - return -1; + @Ensures("result >= 0") + public int getLengthOfImmediatelyFollowingIndel() { + final CigarElement element = getNextIndelCigarElement(); + return element == null ? 0 : element.getLength(); } /** + * Helpful function to get the immediately following cigar element, for an insertion or deletion + * + * if this state precedes a deletion (i.e., next position on genome) or insertion (immediately between + * this and the next position) returns the CigarElement corresponding to this event. Otherwise returns + * null. + * + * @return a CigarElement, or null if the next alignment state ins't an insertion or deletion. + */ + private CigarElement getNextIndelCigarElement() { + if ( isBeforeDeletionStart() ) { + final CigarElement element = getNextOnGenomeCigarElement(); + if ( element == null || element.getOperator() != CigarOperator.D ) + throw new IllegalStateException("Immediately before deletion but the next cigar element isn't a deletion " + element); + return element; + } else if ( isBeforeInsertion() ) { + final CigarElement element = getBetweenNextPosition().get(0); + if ( element.getOperator() != CigarOperator.I ) + throw new IllegalStateException("Immediately before insertion but the next cigar element isn't an insertion " + element); + return element; + } else { + return null; + } + } + + /** + * Get the bases for an insertion that immediately follows this alignment state, or null if none exists + * + * @see #getLengthOfImmediatelyFollowingIndel() for details on the meaning of immediately. + * + * If the immediately following state isn't an insertion, returns null + * * @return actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read. */ - @Deprecated - public String getEventBases() { - // TODO -- compute on the fly, provide meaningful function - return null; + @Ensures("result == null || result.length() == getLengthOfImmediatelyFollowingIndel()") + public String getBasesOfImmediatelyFollowingInsertion() { + final CigarElement element = getNextIndelCigarElement(); + if ( element != null && element.getOperator() == CigarOperator.I ) { + final int getFrom = offset + 1; + final byte[] bases = Arrays.copyOfRange(read.getReadBases(), getFrom, getFrom + element.getLength()); + return new String(bases); + } else + return null; } public int getMappingQual() { diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java index 0994968a1..ec817b65c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils.locusiterator; +import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.ReadProperties; @@ -32,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; import org.broadinstitute.sting.utils.NGSPlatform; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -90,7 +92,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { } } - @Test(enabled = false) + @Test(enabled = true) public void testIndelsInRegularPileup() { final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'}; @@ -125,8 +127,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { for (PileupElement p : pileup) { if (p.isBeforeInsertion()) { foundIndel = true; - Assert.assertEquals(p.getEventLength(), 2, "Wrong event length"); - Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect"); + Assert.assertEquals(p.getLengthOfImmediatelyFollowingIndel(), 2, "Wrong event length"); + Assert.assertEquals(p.getBasesOfImmediatelyFollowingInsertion(), "CT", "Inserted bases are incorrect"); break; } } @@ -240,7 +242,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { // PileupElement pe = p.iterator().next(); // Assert.assertTrue(pe.isBeforeInsertion()); // Assert.assertFalse(pe.isAfterInsertion()); -// Assert.assertEquals(pe.getEventBases(), "A"); +// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "A"); } SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10); @@ -261,10 +263,72 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { // PileupElement pe = p.iterator().next(); // Assert.assertTrue(pe.isBeforeInsertion()); // Assert.assertFalse(pe.isAfterInsertion()); -// Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA"); +// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "AAAAAAAAAA"); } } + + ///////////////////////////////////////////// + // get event length and bases calculations // + ///////////////////////////////////////////// + + @DataProvider(name = "IndelLengthAndBasesTest") + public Object[][] makeIndelLengthAndBasesTest() { + final String EVENT_BASES = "ACGTACGTACGT"; + final List tests = new LinkedList(); + + for ( int eventSize = 1; eventSize < 10; eventSize++ ) { + for ( final CigarOperator indel : Arrays.asList(CigarOperator.D, CigarOperator.I) ) { + final String cigar = String.format("2M%d%s1M", eventSize, indel.toString()); + final String eventBases = indel == CigarOperator.D ? "" : EVENT_BASES.substring(0, eventSize); + final int readLength = 3 + eventBases.length(); + + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength); + read.setReadBases(("TT" + eventBases + "A").getBytes()); + final byte[] quals = new byte[readLength]; + for ( int i = 0; i < readLength; i++ ) + quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE); + read.setBaseQualities(quals); + read.setCigarString(cigar); + + tests.add(new Object[]{read, indel, eventSize, eventBases.equals("") ? null : eventBases}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "IndelLengthAndBasesTest") + public void testIndelLengthAndBasesTest(GATKSAMRecord read, final CigarOperator op, final int eventSize, final String eventBases) { + // create the iterator by state with the fake reads and fake records + li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties()); + + Assert.assertTrue(li.hasNext()); + + final PileupElement firstMatch = getFirstPileupElement(li.next()); + + Assert.assertEquals(firstMatch.getLengthOfImmediatelyFollowingIndel(), 0, "Length != 0 for site not adjacent to indel"); + Assert.assertEquals(firstMatch.getBasesOfImmediatelyFollowingInsertion(), null, "Getbases of following event should be null at non-adajenct event"); + + Assert.assertTrue(li.hasNext()); + + final PileupElement pe = getFirstPileupElement(li.next()); + + if ( op == CigarOperator.D ) + Assert.assertTrue(pe.isBeforeDeletionStart()); + else + Assert.assertTrue(pe.isBeforeInsertion()); + + Assert.assertEquals(pe.getLengthOfImmediatelyFollowingIndel(), eventSize, "Length of event failed"); + Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), eventBases, "Getbases of following event failed"); + } + + private PileupElement getFirstPileupElement(final AlignmentContext context) { + final ReadBackedPileup p = context.getBasePileup(); + Assert.assertEquals(p.getNumberOfElements(), 1); + return p.iterator().next(); + } + //////////////////////////////////////////// // comprehensive LIBS/PileupElement tests // //////////////////////////////////////////// @@ -274,32 +338,18 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { final List tests = new LinkedList(); // tests.add(new Object[]{new LIBSTest("1X2D2P2X", 1)}); -// return tests.toArray(new Object[][]{}); - -// tests.add(new Object[]{new LIBSTest("1I", 1)}); -// tests.add(new Object[]{new LIBSTest("10I", 10)}); -// tests.add(new Object[]{new LIBSTest("2M2I2M", 6)}); -// tests.add(new Object[]{new LIBSTest("2M2I", 4)}); -// //TODO -- uncomment these when LIBS is fixed -// //{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))}, -// //{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))}, -// //{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))}, -// //{new LIBSTest("1M2D2M", 3)}, -// tests.add(new Object[]{new LIBSTest("1S1M", 2)}); -// tests.add(new Object[]{new LIBSTest("1M1S", 2)}); -// tests.add(new Object[]{new LIBSTest("1S1M1I", 3)}); - // return tests.toArray(new Object[][]{}); return createLIBSTests( Arrays.asList(1, 2), Arrays.asList(1, 2, 3, 4)); + // return createLIBSTests( // Arrays.asList(2), // Arrays.asList(3)); } - @Test(dataProvider = "LIBSTest") + @Test(enabled = false, dataProvider = "LIBSTest") public void testLIBS(LIBSTest params) { // create the iterator by state with the fake reads and fake records final GATKSAMRecord read = params.makeRead(); diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByStateUnitTest.java index 5864d2c8c..9fd2cdfeb 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByStateUnitTest.java @@ -107,8 +107,8 @@ // for (PileupElement p : pileup) { // if (p.isBeforeInsertion()) { // foundIndel = true; -// Assert.assertEquals(p.getEventLength(), 2, "Wrong event length"); -// Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect"); +// Assert.assertEquals(p.getLengthOfImmediatelyFollowingIndel(), 2, "Wrong event length"); +// Assert.assertEquals(p.getBasesOfImmediatelyFollowingInsertion(), "CT", "Inserted bases are incorrect"); // break; // } // } @@ -222,7 +222,7 @@ // PileupElement pe = p.iterator().next(); // Assert.assertTrue(pe.isBeforeInsertion()); // Assert.assertFalse(pe.isAfterInsertion()); -// Assert.assertEquals(pe.getEventBases(), "A"); +// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "A"); // } // // SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10); @@ -242,7 +242,7 @@ // PileupElement pe = p.iterator().next(); // Assert.assertTrue(pe.isBeforeInsertion()); // Assert.assertFalse(pe.isAfterInsertion()); -// Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA"); +// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "AAAAAAAAAA"); // } // } //