From 15a410b24b3ed42d6d05698da33f3824cf6f14cf Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 22 Sep 2011 13:15:41 -0400 Subject: [PATCH 1/9] Updating md5 for fixed file --- .../sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index b4a8498e1..1b2a6e82e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("6645babc8c7d46be0da223477c7b1291")); + Arrays.asList("3008d6f5044bc14801e5c58d985dec72")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } From 9c1728416cf6773e510fc8c8843055189e1b03a4 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 22 Sep 2011 13:16:42 -0400 Subject: [PATCH 2/9] Revert "Updating md5 for fixed file" because this was fixed properly in unstable (but will break SnpEff if put into Stable). This reverts commit 6b4182c6ab3e214da4c73bc6f3687ac6d1c0b72c. --- .../sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index 1b2a6e82e..b4a8498e1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("3008d6f5044bc14801e5c58d985dec72")); + Arrays.asList("6645babc8c7d46be0da223477c7b1291")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } From 4e9020c9f7bd6fc429090213ad06c9a6b46ecc70 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 13:28:25 -0400 Subject: [PATCH 3/9] Fixed alignment start for hard clipping insertions --- .../sting/utils/clipreads/ClippingOp.java | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 951ab265c..0c2def1c6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -432,25 +432,37 @@ public class ClippingOp { } private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { - int shift = 0; + int newShift = 0; + int oldShift = 0; + int deletionShift = 0; - // Rewind to previous start (by counting everything that was already clipped in this read) - for (CigarElement cigarElement : oldCigar.getCigarElements()) { - if (!cigarElement.getOperator().consumesReferenceBases()) - shift -= cigarElement.getLength(); - else - break; - } - - // Advance to new start (by counting everything new that has been clipped ) for (CigarElement cigarElement : newCigar.getCigarElements()) { - if (!cigarElement.getOperator().consumesReferenceBases()) - shift += cigarElement.getLength(); + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) + newShift += cigarElement.getLength(); else break; } - return shift; + for (CigarElement cigarElement : oldCigar.getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP ) + oldShift += Math.min(cigarElement.getLength(), newShift - oldShift); + else + break; + } + + int basesClipped = 0; + for (CigarElement cigarElement : oldCigar.getCigarElements()) { + if (basesClipped > newShift) // are we beyond the clipped region? + break; + + else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift + deletionShift += cigarElement.getLength(); + + else if (cigarElement.getOperator() != CigarOperator.INSERTION) // if it's not an insertion or deletion, than it counts as hard clipped base. + basesClipped += cigarElement.getLength(); + } + + return newShift - oldShift + deletionShift; } private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { From 80d7300de4acaa172e256e91f02a962d6479c6e0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 22 Sep 2011 13:28:42 -0400 Subject: [PATCH 4/9] Unit test was passing in FORMAT as one of the sample names. There used to be a hack in the VCFHeader to check for this and remove it and I couldn't figure out why, but now I know. Hack was removed and now the unit test passes in only the sample names as per the contract. --- .../sting/utils/genotype/vcf/VCFWriterUnitTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index a8e6593b1..35c6a4993 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -105,7 +105,6 @@ public class VCFWriterUnitTest extends BaseTest { public static VCFHeader createFakeHeader(Set metaData, Set additionalColumns) { metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString())); metaData.add(new VCFHeaderLine("two", "2")); - additionalColumns.add("FORMAT"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); return new VCFHeader(metaData, additionalColumns); @@ -159,6 +158,6 @@ public class VCFWriterUnitTest extends BaseTest { Assert.assertTrue(additionalColumns.contains(key)); index++; } - Assert.assertEquals(index+1, additionalColumns.size() /* for the header field we don't see */); + Assert.assertEquals(index, additionalColumns.size()); } } From 1acf7945c544a923c5024b06a8322d351c889584 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 14:51:14 -0400 Subject: [PATCH 5/9] Fixed hard clipped cigar and alignment start * Hard clipped Cigar now includes all insertions that were hard clipped and not the deletions. * The alignment start is now recalculated according to the new hard clipped cigar representation --- .../sting/utils/clipreads/ClippingOp.java | 6 +++--- .../sting/utils/clipreads/ReadClipper.java | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 0c2def1c6..47ce8165c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -458,7 +458,7 @@ public class ClippingOp { else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift deletionShift += cigarElement.getLength(); - else if (cigarElement.getOperator() != CigarOperator.INSERTION) // if it's not an insertion or deletion, than it counts as hard clipped base. + else basesClipped += cigarElement.getLength(); } @@ -474,8 +474,8 @@ public class ClippingOp { return -clippedLength; } - if (cigarElement.getOperator() == CigarOperator.DELETION) - return cigarElement.getLength(); +// if (cigarElement.getOperator() == CigarOperator.DELETION) +// return cigarElement.getLength(); return 0; } diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 9b9ce4fdf..e2ecbe46b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -71,21 +71,21 @@ public class ReadClipper { private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); - int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); + int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); - if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read)) + if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); - // TODO add check in the Hardclip function - if ( start > stop ) - stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); - + if ( start > stop ) { +// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); + throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); + } //This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into //an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read - stop -= numDeletions(read); - if ( start > stop ) - start -= numDeletions(read); +// stop -= numDeletions(read); +// if ( start > stop ) +// start -= numDeletions(read); //System.out.println("Clipping start/stop: " + start + "/" + stop); From 39b54211d079dc2c70a2a9b4a38b9e08880b24df Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 15:46:55 -0400 Subject: [PATCH 7/9] Fixed hard clipping soft clipped bases after hard clips if soft clipped bases were after a hard clipped section of the read, the hard clip was clipping the left soft clip tail as if it were a right tail. Mayhem. --- .../org/broadinstitute/sting/utils/clipreads/ReadClipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index e2ecbe46b..d0e7f8371 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -145,7 +145,7 @@ public class ReadClipper { cutLeft = readIndex + cigarElement.getLength() - 1; } } - else + else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) rightTail = true; if (cigarElement.getOperator().consumesReadBases())