From 5b6d279a2e6ebe1c20dab124561307da890408a8 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 29 Jun 2011 10:21:27 -0400 Subject: [PATCH 01/15] Two bug fixes: a) Modified the way clipped bases are dealt with in ReadPosRankSumTest when annotating indels. Cigar string cannot be trusted because BWA can clip good high quality bases and some sites get incorrect ReadPos annotations if BWA systematically clips at an indel breakpoint. b) PL header needs to specify "." as length. Otherwise we fail VCF validation if multiallelic sites are present. --- .../walkers/annotator/ReadPosRankSumTest.java | 85 ++++++++++++++++++- .../indels/PairHMMIndelErrorModel.java | 8 +- .../sting/utils/codecs/vcf/VCFUtils.java | 2 +- 3 files changed, 86 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 2a17cd436..727904a3b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -4,9 +4,11 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import net.sf.samtools.*; import java.util.Arrays; import java.util.HashMap; @@ -67,20 +69,95 @@ public class ReadPosRankSumTest extends RankSumTest { altLikelihood = like; } } - int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p.getOffset(), 0, 0); - final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead()); + + int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset()); + final int numAlignedBases = getNumAlignedBases(p.getRead()); + + int rp = readPos; if( readPos > numAlignedBases / 2 ) { readPos = numAlignedBases - ( readPos + 1 ); } + //if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases); - if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) + + // if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads + // where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping. + // if (readPos < -1) + // return; + if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) { refQuals.add((double)readPos); - else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) + //if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos); + } + else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) { altQuals.add((double)readPos); + //if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos); + + } } } } + int getNumClippedBasesAtStart(SAMRecord read) { + // compute total number of clipped bases (soft or hard clipped) + // check for hard clips (never consider these bases): + final Cigar c = read.getCigar(); + final CigarElement first = c.getCigarElement(0); + + int numStartClippedBases = 0; + if (first.getOperator() == CigarOperator.H) { + numStartClippedBases = first.getLength(); + } + byte[] unclippedReadBases = read.getReadBases(); + byte[] unclippedReadQuals = read.getBaseQualities(); + + // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative, + // and may leave a string of Q2 bases still hanging off the reads. + for (int i=numStartClippedBases; i < unclippedReadBases.length; i++) { + if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) + numStartClippedBases++; + else + break; + + } + + return numStartClippedBases; + } + + int getNumAlignedBases(SAMRecord read) { + return read.getReadLength() - getNumClippedBasesAtStart(read) - getNumClippedBasesAtEnd(read); + } + + int getNumClippedBasesAtEnd(SAMRecord read) { + // compute total number of clipped bases (soft or hard clipped) + // check for hard clips (never consider these bases): + final Cigar c = read.getCigar(); + CigarElement last = c.getCigarElement(c.numCigarElements()-1); + + int numEndClippedBases = 0; + if (last.getOperator() == CigarOperator.H) { + numEndClippedBases = last.getLength(); + } + byte[] unclippedReadBases = read.getReadBases(); + byte[] unclippedReadQuals = read.getBaseQualities(); + + // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative, + // and may leave a string of Q2 bases still hanging off the reads. + for (int i=unclippedReadBases.length-numEndClippedBases-1; i >= 0; i-- ){ + if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) + numEndClippedBases++; + else + break; + } + + + return numEndClippedBases; + } + + int getOffsetFromClippedReadStart(SAMRecord read, int offset) { + + + return offset - getNumClippedBasesAtStart(read); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index e7b9cfa68..ab7ae4184 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -32,9 +32,9 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; /*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate; -import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDataManager; -import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDatum; -import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalibrationArgumentCollection; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection; */import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -57,7 +57,7 @@ import java.util.regex.Pattern; public class PairHMMIndelErrorModel { - private final int BASE_QUAL_THRESHOLD = 10; + public static final int BASE_QUAL_THRESHOLD = 20; private static final int MATCH_OFFSET = 0; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index e919d5c25..ecede068e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -190,7 +190,7 @@ public class VCFUtils { result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); - result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); + result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2")); return result; } From f736a1d61b859f5c439cfcb856faa3a88011ae82 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 29 Jun 2011 13:37:15 -0400 Subject: [PATCH 02/15] Updated md5's from previous checkin --- .../UnifiedGenotyperIntegrationTest.java | 48 +++++++++---------- .../BatchMergeIntegrationTest.java | 2 +- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 51f2b5786..20fa7719f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("a604a64252a8538b7d13f52bd068f797")); + Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0")); executeTest("test MultiSample Pilot1", spec); } @@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("5844eda3596732a16c8559f5bfbe1723")); + Arrays.asList("edeb1db288a24baff59575ceedd94243")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("db4664a1785c4efb4cd9057478aa846f")); + Arrays.asList("581990130d90071b084024f4cd7caf91")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("36c70ec27a25f88fe2364bba2f961843")); + Arrays.asList("d120db27d694a6da32367cc4fb5770fa")); executeTest("test SingleSample Pilot2", spec); } @@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "212eab2024903997625ba98009063226"; + private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4"; @Test public void testCompressedOutput() { @@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "f83a33a1ecc350cae0c002e4a43a7861"; + String md5 = "a29615dd37222a11b8dadd341b53e43c"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testCallingParameters() { HashMap e = new HashMap(); - e.put( "--min_base_quality_score 26", "d10d0be159d80e22b9c81970ee098daf" ); - e.put( "--min_mapping_quality_score 26", "f76099c403b60b6045a0ae7d9f589dc4" ); - e.put( "--p_nonref_model GRID_SEARCH", "cda395fdf7352e07537610f52a6d0cdc" ); + e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" ); + e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" ); + e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameter() { HashMap e = new HashMap(); - e.put( "-sites_only", "9b85d9c10d634315d20aefa565dbab60" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "245abbb39de43e89f63918a6771c0c14" ); - e.put( "--output_mode EMIT_ALL_SITES", "fb7a59b318ecdb46fd96024be7e41e0e" ); + e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" ); + e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("f76099c403b60b6045a0ae7d9f589dc4")); + Arrays.asList("64be99183c100caed4aa5f8bad64c7e9")); executeTest("test confidence 1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("879e5ab09bd0d37e0300dd34ec09db81")); + Arrays.asList("e76ca54232d02f0d92730e1affeb804e")); executeTest("test confidence 2", spec2); } @@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "c7123f7b84b402f4959db950326afc13" ); - e.put( 1.0 / 1850, "75e6043a68265ab6deb284bb753801f9" ); + e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" ); + e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("3f45b2af75123e48b89fa1759c444ec0")); + Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); executeTest(String.format("test multiple technologies"), spec); } @@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("cede928592575e617f1323866348c256")); + Arrays.asList("0919ab7e513c377610e23a67d33608fa")); executeTest(String.format("test calling with BAQ"), spec); } @@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq OFF", 1, - Arrays.asList("3f45b2af75123e48b89fa1759c444ec0")); + Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); executeTest(String.format("test calling with BAQ OFF"), spec); } @@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("7fe14d81f12d5d57e3a522b2a4f07fc6")); + Arrays.asList("cb37348c41b8181be829912730f747e1")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("a7da8acce1957334619f3dfeac3d1379")); + Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2")); executeTest(String.format("test indel caller in SLX witn low min allele count"), spec); } @@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("234b6c336890cc6d9a495bc40f09d126")); + Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("9e342e3b73ae4887620195417e1af44a")); + Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("f265726403ca3f28518cb4290b7bee84")); + Arrays.asList("1b9764b783acf7822edc58e6822eef5b")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java index 11eb615d2..7e1d86105 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java @@ -40,7 +40,7 @@ public class BatchMergeIntegrationTest extends WalkerTest { + " -B:alleles,VCF " + alleles + " -I " + bam, 1, - Arrays.asList("b7839064dc4979400af4792460d9884b")); + Arrays.asList("f4ed8f4ef2cba96823c06e90e9d0de35")); executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec); } } \ No newline at end of file From 8586c86bc4d9c02c315100d2278f56e082134875 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 29 Jun 2011 13:56:37 -0400 Subject: [PATCH 03/15] My commit from last week to fix the old dbsnp rod conversion only worked for locus traversals. Updated now to work for all traversals. --- .../sting/gatk/refdata/VariantContextAdaptors.java | 4 ++-- .../variantcontext/VariantContextIntegrationTest.java | 8 ++++---- .../walkers/indels/IndelRealignerIntegrationTest.java | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 7d7a9d32c..c7c0468e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -127,9 +127,9 @@ public class VariantContextAdaptors { Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); if ( DbSNPHelper.isDeletion(dbsnp) ) { - int index = ref.getLocus().getStart() - ref.getWindow().getStart() - 1; + int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; if ( index < 0 ) - throw new ReviewedStingException("DbSNP conversion requested using a reference context with no window; we will fail to convert deletions"); + return null; // we weren't given enough reference context to create the VariantContext attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index])); } Collection genotypes = null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java index 5506b2d27..a72cf0713 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java @@ -19,12 +19,12 @@ public class VariantContextIntegrationTest extends WalkerTest { static HashMap expectations = new HashMap(); static { - expectations.put("-L 1:1-10000 --printPerLocus", "493bf9bcde93c08c8c46f72c8e98cf2f"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "bd9e062b23c2c48fef8c299dc1d294d5"); + expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde"); expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "91c6a9489256d9ce77c8fedf7221a961"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { // this really just tests that we are seeing the same number of objects over all of chr1 WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", 1, // just one output file - Arrays.asList("d2a3f2fe329a0a64145cfd19fde45b99")); + Arrays.asList("529f936aa6c303658b23caf4e527782f")); executeTest("testLargeScaleConversion", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 0c841d746..2676f7067 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -52,7 +52,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec( baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod", 1, - Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d")); + Arrays.asList("05a114623c126b0398fbc1703437461e")); executeTest("realigner known indels only from dbsnp", spec2); } From 9a22d78e4860d1f3aec718e175ef6acec5be1c9e Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Wed, 29 Jun 2011 14:48:10 -0400 Subject: [PATCH 05/15] Fix up gsalib target to work with new public/private directory structure. --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 3173cb82d..cf8623bed 100644 --- a/build.xml +++ b/build.xml @@ -978,7 +978,7 @@ - + From 33c67a139cd395ef4374f8eed4dcebfb3634b134 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 29 Jun 2011 14:56:02 -0400 Subject: [PATCH 06/15] Wrong package; this should have been moved when VC got moved in from Tribble --- .../variantcontext/VariantContextIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename public/java/test/org/broadinstitute/sting/{gatk/contexts => utils}/variantcontext/VariantContextIntegrationTest.java (98%) diff --git a/public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java similarity index 98% rename from public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java rename to public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index a72cf0713..5d42f8d0c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -1,6 +1,6 @@ -package org.broadinstitute.sting.gatk.contexts.variantcontext; +package org.broadinstitute.sting.utils.variantcontext; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; From 1f19afe1d9e8724ea998ac62c481e2ebdc4af7a5 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 29 Jun 2011 15:54:09 -0400 Subject: [PATCH 08/15] Fixed bug in the IndelRealigner: now that variants are correctly typed in VariantContext, it is possible that a variant can be an indel but neither an insertion or a deletion; added a isComplexIndel() method and now we check for such an event in the realigner (we don't use them to generate alternate consenses). Also, added a isMNP() method while I was there so that it would be consistent with other variant types. --- .../sting/gatk/walkers/indels/IndelRealigner.java | 10 +++++----- .../gatk/walkers/phasing/AnnotateMNPsWalker.java | 2 +- .../gatk/walkers/sequenom/PickSequenomProbes.java | 2 +- .../gatk/walkers/variantutils/ValidateVariants.java | 4 ++-- .../sting/utils/variantcontext/VariantContext.java | 11 +++++++++++ 5 files changed, 20 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 28b88f9f2..33de08bb6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -798,11 +798,11 @@ public class IndelRealigner extends ReadWalker { private void generateAlternateConsensesFromKnownIndels(final Set altConsensesToPopulate, final int leftmostIndex, final byte[] reference) { for ( VariantContext knownIndel : knownIndelsToTry ) { - if ( knownIndel == null || !knownIndel.isIndel() ) + if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() ) continue; byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); int start = knownIndel.getStart() - leftmostIndex + 1; - Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel.isDeletion()); + Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel); if ( c != null ) altConsensesToPopulate.add(c); } @@ -988,7 +988,7 @@ public class IndelRealigner extends ReadWalker { } // create a Consensus from just the indel string that falls on the reference - private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final boolean isDeletion) { + private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final VariantContext indel) { if ( indexOnRef < 0 || indexOnRef >= reference.length ) return null; @@ -1002,11 +1002,11 @@ public class IndelRealigner extends ReadWalker { if ( indexOnRef > 0 ) cigar.add(new CigarElement(indexOnRef, CigarOperator.M)); - if ( isDeletion ) { + if ( indel.isDeletion() ) { refIdx += indelStr.length; cigar.add(new CigarElement(indelStr.length, CigarOperator.D)); } - else { + else if ( indel.isInsertion() ) { for ( byte b : indelStr ) sb.append((char)b); cigar.add(new CigarElement(indelStr.length, CigarOperator.I)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java index 7989f52db..81d9b4ddb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java @@ -161,7 +161,7 @@ public class AnnotateMNPsWalker extends RodWalker { boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart(); boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop(); - if (vc.getType() == VariantContext.Type.MNP) { + if (vc.isMNP()) { logger.debug("Observed MNP at " + vcLoc); if (isChrM(vc)) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java index f350e7531..fde233b5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java @@ -209,7 +209,7 @@ public class PickSequenomProbes extends RodWalker { String assay_sequence; if ( vc.isSNP() ) assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; - else if ( vc.getType() == VariantContext.Type.MNP ) + else if ( vc.isMNP() ) assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1); else if ( vc.isInsertion() ) assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index ec7395f85..1bd73414c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -113,7 +113,7 @@ public class ValidateVariants extends RodWalker { observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING); } // deletions - else if ( vc.isDeletion() || vc.isMixed() || vc.getType() == VariantContext.Type.MNP ) { + else if ( vc.isDeletion() || vc.isMixed() || vc.isMNP() ) { // we can't validate arbitrarily long deletions if ( reportedRefAllele.length() > 100 ) { logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart())); @@ -123,7 +123,7 @@ public class ValidateVariants extends RodWalker { // deletions are associated with the (position of) the last (preceding) non-deleted base; // hence to get actually deleted bases we need offset = 1 int offset = 1 ; - if ( vc.getType() == VariantContext.Type.MNP ) offset = 0; // if it's an MNP, the reported position IS the first modified base + if ( vc.isMNP() ) offset = 0; // if it's an MNP, the reported position IS the first modified base byte[] refBytes = ref.getBases(); byte[] trueRef = new byte[reportedRefAllele.length()]; for (int i = 0; i < reportedRefAllele.length(); i++) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 87466e21b..92262f1bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -566,10 +566,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati return getType() == Type.INDEL && getAlternateAllele(0).isNull(); } + /** + * @return true if the alleles indicate neither a simple deletion nor a simple insertion + */ + public boolean isComplexIndel() { + return isIndel() && !isDeletion() && !isInsertion(); + } + public boolean isSymbolic() { return getType() == Type.SYMBOLIC; } + public boolean isMNP() { + return getType() == Type.MNP; + } + /** * convenience method for indels * From 70ba851478a1805509d306a8514c192b57b38e6b Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 29 Jun 2011 15:59:10 -0400 Subject: [PATCH 09/15] Might as well check for the illegal state and throw an exception --- .../sting/gatk/walkers/indels/IndelRealigner.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 33de08bb6..3cfa58c02 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -1010,6 +1010,8 @@ public class IndelRealigner extends ReadWalker { for ( byte b : indelStr ) sb.append((char)b); cigar.add(new CigarElement(indelStr.length, CigarOperator.I)); + } else { + throw new ReviewedStingException("Creating an alternate consensus from a complex indel is not allowed"); } if ( reference.length - refIdx > 0 ) From 1085df8b7b3f23e853444074bf9d202c9a54be1a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 29 Jun 2011 16:05:32 -0400 Subject: [PATCH 11/15] Making the BQSR pipeline publicly available and supported. this is for all the Pacbio validation that is going on right no in the cancer group. They are all using this script, and I'm happy to support it. --- .../core/RecalibrateBaseQualities.scala | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100755 public/scala/qscript/core/RecalibrateBaseQualities.scala diff --git a/public/scala/qscript/core/RecalibrateBaseQualities.scala b/public/scala/qscript/core/RecalibrateBaseQualities.scala new file mode 100755 index 000000000..ba13d267a --- /dev/null +++ b/public/scala/qscript/core/RecalibrateBaseQualities.scala @@ -0,0 +1,91 @@ +package core + +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ +import net.sf.samtools.SAMFileReader + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 4/20/11 + * Time: 16:29 PM + */ + + +class RecalibrateBaseQualities extends QScript { + + @Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true) + var GATKjar: File = _ + + @Input(doc="input BAM file - or list of BAM files", shortName="i", required=true) + var input: File = _ + + @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false) + var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R") + + @Input(doc="Reference fasta file", shortName="R", required=false) + var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + + @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false) + var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") + + val queueLogDir: String = ".qlog/" + var nContigs: Int = 0 + + def getNumberOfContigs(bamFile: File): Int = { + val samReader = new SAMFileReader(new File(bamFile)) + return samReader.getFileHeader.getSequenceDictionary.getSequences.size() + } + + def script = { + + nContigs = getNumberOfContigs(input) + + val recalFile1: File = new File("recal1.csv") + val recalFile2: File = new File("recal2.csv") + val recalBam: File = swapExt(input, ".bam", "recal.bam") + val path1: String = "before" + val path2: String = "after" + + add(cov(input, recalFile1), + recal(input, recalFile1, recalBam), + cov(recalBam, recalFile2), + analyzeCovariates(recalFile1, path1), + analyzeCovariates(recalFile2, path2)) + } + + trait CommandLineGATKArgs extends CommandLineGATK { + this.jarFile = GATKjar + this.reference_sequence = reference + this.memoryLimit = 4 + this.isIntermediate = true + } + + case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { + this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) + this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") + this.input_file :+= inBam + this.recal_file = outRecalFile + this.analysisName = queueLogDir + outRecalFile + ".covariates" + this.jobName = queueLogDir + outRecalFile + ".covariates" + this.scatterCount = nContigs + } + + case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { + this.input_file :+= inBam + this.recal_file = inRecalFile + this.out = outBam + this.isIntermediate = false + this.analysisName = queueLogDir + outBam + ".recalibration" + this.jobName = queueLogDir + outBam + ".recalibration" + this.scatterCount = nContigs + } + + case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { + this.resources = R + this.recal_file = inRecalFile + this.output_dir = outPath.toString + this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" + this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" + } +} \ No newline at end of file From f18fffd6254a9973f9f95b521278b6444d7fb80e Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 29 Jun 2011 17:36:47 -0400 Subject: [PATCH 12/15] Fixing broken paths to the testdata directory throughout the codebase. --- .../test/org/broadinstitute/sting/BaseTest.java | 2 +- .../sting/gatk/GenomeAnalysisEngineUnitTest.java | 14 +++++++------- .../sting/utils/bed/BedParserUnitTest.java | 2 +- .../sting/utils/codecs/hapmap/HapMapUnitTest.java | 2 +- .../utils/codecs/vcf/IndexFactoryUnitTest.java | 4 ++-- .../sting/utils/text/ListFileUtilsUnitTest.java | 12 ++++++------ .../GenomeLocProcessingTrackerUnitTest.java | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 07f46c1c3..61bb8b34b 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -83,7 +83,7 @@ public abstract class BaseTest { */ public static final String MD5_FILE_DB_SUBDIR = "integrationtests"; - public static final String testDir = "testdata/"; + public static final String testDir = "public/testdata/"; /** before the class starts up */ static { diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index a17c162f5..1e4625bf0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -60,8 +60,8 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); Collection samFiles = new ArrayList(); - samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); - samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); testEngine.setSAMFileIDs(samFiles); testEngine.checkForDuplicateSamFiles(); @@ -72,10 +72,10 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); Collection samFiles = new ArrayList(); - samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); - samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags())); - samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); - samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags())); testEngine.setSAMFileIDs(samFiles); testEngine.checkForDuplicateSamFiles(); @@ -97,7 +97,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { GATKArgumentCollection argCollection = new GATKArgumentCollection(); testEngine.setArguments(argCollection); - File fastaFile = new File("testdata/exampleFASTA.fasta"); + File fastaFile = new File("public/testdata/exampleFASTA.fasta"); GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile)); testEngine.setGenomeLocParser(genomeLocParser); diff --git a/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java index e592c99b9..56bf66f53 100644 --- a/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java @@ -21,7 +21,7 @@ public class BedParserUnitTest extends BaseTest { private static IndexedFastaSequenceFile seq; private GenomeLocParser genomeLocParser; - private File bedFile = new File("testdata/sampleBedFile.bed"); + private File bedFile = new File("public/testdata/sampleBedFile.bed"); @BeforeClass public void beforeTests() { diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java index 2e618d5eb..e912f97e9 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java @@ -39,7 +39,7 @@ import java.io.IOException; */ public class HapMapUnitTest { // our sample hapmap file - private final static File hapMapFile = new File("testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap"); + private final static File hapMapFile = new File("public/testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap"); private final static String knownLine = "rs2185539 C/T chr1 556738 + ncbi_b36 bbs urn:lsid:bbs.hapmap.org:Protocol:Phase3.r3:1 urn:lsid:bbs.hapmap.org:Assay:Phase3.r3_r" + "s2185539:1 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:4 QC+ CC TC TT CT CC CC CC CC CC CC CC CC CC"; /** diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java index 3929ff388..2f6b589f4 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java @@ -17,8 +17,8 @@ import java.util.*; */ public class IndexFactoryUnitTest { - File inputFile = new File("testdata/HiSeq.10000.vcf"); - File outputFile = new File("testdata/onTheFlyOutputTest.vcf"); + File inputFile = new File("public/testdata/HiSeq.10000.vcf"); + File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf"); File outputFileIndex = Tribble.indexFile(outputFile); /** diff --git a/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java index f85a2f599..f0b1de6fe 100644 --- a/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java @@ -49,12 +49,12 @@ public class ListFileUtilsUnitTest extends BaseTest { public void testIgnoreBlankLinesInBAMListFiles() throws Exception { File tempListFile = createTempListFile("testIgnoreBlankLines", "", - "testdata/exampleBAM.bam", + "public/testdata/exampleBAM.bam", " " ); List expectedBAMFileListAfterUnpacking = new ArrayList(); - expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking); } @@ -63,13 +63,13 @@ public class ListFileUtilsUnitTest extends BaseTest { public void testCommentSupportInBAMListFiles() throws Exception { File tempListFile = createTempListFile("testCommentSupport", "#", - "testdata/exampleBAM.bam", - "#testdata/foo.bam", - " # testdata/bar.bam" + "public/testdata/exampleBAM.bam", + "#public/testdata/foo.bam", + " # public/testdata/bar.bam" ); List expectedBAMFileListAfterUnpacking = new ArrayList(); - expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking); } diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java index df4e1660d..78ab916db 100644 --- a/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java @@ -29,7 +29,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest { IndexedFastaSequenceFile fasta = null; GenomeLocParser genomeLocParser = null; String chr1 = null; - private final static String FILE_ROOT = "testdata/GLPTFile"; + private final static String FILE_ROOT = "public/testdata/GLPTFile"; @BeforeTest public void before() {