diff --git a/build.xml b/build.xml index 3173cb82d..cf8623bed 100644 --- a/build.xml +++ b/build.xml @@ -978,7 +978,7 @@ - + diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 7d7a9d32c..c7c0468e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -127,9 +127,9 @@ public class VariantContextAdaptors { Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); if ( DbSNPHelper.isDeletion(dbsnp) ) { - int index = ref.getLocus().getStart() - ref.getWindow().getStart() - 1; + int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; if ( index < 0 ) - throw new ReviewedStingException("DbSNP conversion requested using a reference context with no window; we will fail to convert deletions"); + return null; // we weren't given enough reference context to create the VariantContext attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index])); } Collection genotypes = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 2a17cd436..727904a3b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -4,9 +4,11 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import net.sf.samtools.*; import java.util.Arrays; import java.util.HashMap; @@ -67,20 +69,95 @@ public class ReadPosRankSumTest extends RankSumTest { altLikelihood = like; } } - int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p.getOffset(), 0, 0); - final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead()); + + int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset()); + final int numAlignedBases = getNumAlignedBases(p.getRead()); + + int rp = readPos; if( readPos > numAlignedBases / 2 ) { readPos = numAlignedBases - ( readPos + 1 ); } + //if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases); - if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) + + // if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads + // where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping. + // if (readPos < -1) + // return; + if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) { refQuals.add((double)readPos); - else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) + //if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos); + } + else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) { altQuals.add((double)readPos); + //if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos); + + } } } } + int getNumClippedBasesAtStart(SAMRecord read) { + // compute total number of clipped bases (soft or hard clipped) + // check for hard clips (never consider these bases): + final Cigar c = read.getCigar(); + final CigarElement first = c.getCigarElement(0); + + int numStartClippedBases = 0; + if (first.getOperator() == CigarOperator.H) { + numStartClippedBases = first.getLength(); + } + byte[] unclippedReadBases = read.getReadBases(); + byte[] unclippedReadQuals = read.getBaseQualities(); + + // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative, + // and may leave a string of Q2 bases still hanging off the reads. + for (int i=numStartClippedBases; i < unclippedReadBases.length; i++) { + if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) + numStartClippedBases++; + else + break; + + } + + return numStartClippedBases; + } + + int getNumAlignedBases(SAMRecord read) { + return read.getReadLength() - getNumClippedBasesAtStart(read) - getNumClippedBasesAtEnd(read); + } + + int getNumClippedBasesAtEnd(SAMRecord read) { + // compute total number of clipped bases (soft or hard clipped) + // check for hard clips (never consider these bases): + final Cigar c = read.getCigar(); + CigarElement last = c.getCigarElement(c.numCigarElements()-1); + + int numEndClippedBases = 0; + if (last.getOperator() == CigarOperator.H) { + numEndClippedBases = last.getLength(); + } + byte[] unclippedReadBases = read.getReadBases(); + byte[] unclippedReadQuals = read.getBaseQualities(); + + // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative, + // and may leave a string of Q2 bases still hanging off the reads. + for (int i=unclippedReadBases.length-numEndClippedBases-1; i >= 0; i-- ){ + if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) + numEndClippedBases++; + else + break; + } + + + return numEndClippedBases; + } + + int getOffsetFromClippedReadStart(SAMRecord read, int offset) { + + + return offset - getNumClippedBasesAtStart(read); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 28b88f9f2..3cfa58c02 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -798,11 +798,11 @@ public class IndelRealigner extends ReadWalker { private void generateAlternateConsensesFromKnownIndels(final Set altConsensesToPopulate, final int leftmostIndex, final byte[] reference) { for ( VariantContext knownIndel : knownIndelsToTry ) { - if ( knownIndel == null || !knownIndel.isIndel() ) + if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() ) continue; byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); int start = knownIndel.getStart() - leftmostIndex + 1; - Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel.isDeletion()); + Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel); if ( c != null ) altConsensesToPopulate.add(c); } @@ -988,7 +988,7 @@ public class IndelRealigner extends ReadWalker { } // create a Consensus from just the indel string that falls on the reference - private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final boolean isDeletion) { + private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final VariantContext indel) { if ( indexOnRef < 0 || indexOnRef >= reference.length ) return null; @@ -1002,14 +1002,16 @@ public class IndelRealigner extends ReadWalker { if ( indexOnRef > 0 ) cigar.add(new CigarElement(indexOnRef, CigarOperator.M)); - if ( isDeletion ) { + if ( indel.isDeletion() ) { refIdx += indelStr.length; cigar.add(new CigarElement(indelStr.length, CigarOperator.D)); } - else { + else if ( indel.isInsertion() ) { for ( byte b : indelStr ) sb.append((char)b); cigar.add(new CigarElement(indelStr.length, CigarOperator.I)); + } else { + throw new ReviewedStingException("Creating an alternate consensus from a complex indel is not allowed"); } if ( reference.length - refIdx > 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 419ae694f..5c1f57bc7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -32,9 +32,9 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; /*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate; -import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDataManager; -import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDatum; -import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalibrationArgumentCollection; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum; +import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection; */import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -57,7 +57,7 @@ import java.util.regex.Pattern; public class PairHMMIndelErrorModel { - private final int BASE_QUAL_THRESHOLD = 10; + public static final int BASE_QUAL_THRESHOLD = 20; private static final int MATCH_OFFSET = 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java index 7989f52db..81d9b4ddb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java @@ -161,7 +161,7 @@ public class AnnotateMNPsWalker extends RodWalker { boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart(); boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop(); - if (vc.getType() == VariantContext.Type.MNP) { + if (vc.isMNP()) { logger.debug("Observed MNP at " + vcLoc); if (isChrM(vc)) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java index f350e7531..fde233b5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbes.java @@ -209,7 +209,7 @@ public class PickSequenomProbes extends RodWalker { String assay_sequence; if ( vc.isSNP() ) assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; - else if ( vc.getType() == VariantContext.Type.MNP ) + else if ( vc.isMNP() ) assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1); else if ( vc.isInsertion() ) assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index ec7395f85..1bd73414c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -113,7 +113,7 @@ public class ValidateVariants extends RodWalker { observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING); } // deletions - else if ( vc.isDeletion() || vc.isMixed() || vc.getType() == VariantContext.Type.MNP ) { + else if ( vc.isDeletion() || vc.isMixed() || vc.isMNP() ) { // we can't validate arbitrarily long deletions if ( reportedRefAllele.length() > 100 ) { logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart())); @@ -123,7 +123,7 @@ public class ValidateVariants extends RodWalker { // deletions are associated with the (position of) the last (preceding) non-deleted base; // hence to get actually deleted bases we need offset = 1 int offset = 1 ; - if ( vc.getType() == VariantContext.Type.MNP ) offset = 0; // if it's an MNP, the reported position IS the first modified base + if ( vc.isMNP() ) offset = 0; // if it's an MNP, the reported position IS the first modified base byte[] refBytes = ref.getBases(); byte[] trueRef = new byte[reportedRefAllele.length()]; for (int i = 0; i < reportedRefAllele.length(); i++) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index e919d5c25..ecede068e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -190,7 +190,7 @@ public class VCFUtils { result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); - result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); + result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2")); return result; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 87466e21b..92262f1bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -566,10 +566,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati return getType() == Type.INDEL && getAlternateAllele(0).isNull(); } + /** + * @return true if the alleles indicate neither a simple deletion nor a simple insertion + */ + public boolean isComplexIndel() { + return isIndel() && !isDeletion() && !isInsertion(); + } + public boolean isSymbolic() { return getType() == Type.SYMBOLIC; } + public boolean isMNP() { + return getType() == Type.MNP; + } + /** * convenience method for indels * diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 07f46c1c3..61bb8b34b 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -83,7 +83,7 @@ public abstract class BaseTest { */ public static final String MD5_FILE_DB_SUBDIR = "integrationtests"; - public static final String testDir = "testdata/"; + public static final String testDir = "public/testdata/"; /** before the class starts up */ static { diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index a17c162f5..1e4625bf0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -60,8 +60,8 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); Collection samFiles = new ArrayList(); - samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); - samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); testEngine.setSAMFileIDs(samFiles); testEngine.checkForDuplicateSamFiles(); @@ -72,10 +72,10 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); Collection samFiles = new ArrayList(); - samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); - samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags())); - samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); - samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); + samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags())); testEngine.setSAMFileIDs(samFiles); testEngine.checkForDuplicateSamFiles(); @@ -97,7 +97,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { GATKArgumentCollection argCollection = new GATKArgumentCollection(); testEngine.setArguments(argCollection); - File fastaFile = new File("testdata/exampleFASTA.fasta"); + File fastaFile = new File("public/testdata/exampleFASTA.fasta"); GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile)); testEngine.setGenomeLocParser(genomeLocParser); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 51f2b5786..20fa7719f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("a604a64252a8538b7d13f52bd068f797")); + Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0")); executeTest("test MultiSample Pilot1", spec); } @@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("5844eda3596732a16c8559f5bfbe1723")); + Arrays.asList("edeb1db288a24baff59575ceedd94243")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("db4664a1785c4efb4cd9057478aa846f")); + Arrays.asList("581990130d90071b084024f4cd7caf91")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("36c70ec27a25f88fe2364bba2f961843")); + Arrays.asList("d120db27d694a6da32367cc4fb5770fa")); executeTest("test SingleSample Pilot2", spec); } @@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "212eab2024903997625ba98009063226"; + private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4"; @Test public void testCompressedOutput() { @@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "f83a33a1ecc350cae0c002e4a43a7861"; + String md5 = "a29615dd37222a11b8dadd341b53e43c"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testCallingParameters() { HashMap e = new HashMap(); - e.put( "--min_base_quality_score 26", "d10d0be159d80e22b9c81970ee098daf" ); - e.put( "--min_mapping_quality_score 26", "f76099c403b60b6045a0ae7d9f589dc4" ); - e.put( "--p_nonref_model GRID_SEARCH", "cda395fdf7352e07537610f52a6d0cdc" ); + e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" ); + e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" ); + e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameter() { HashMap e = new HashMap(); - e.put( "-sites_only", "9b85d9c10d634315d20aefa565dbab60" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "245abbb39de43e89f63918a6771c0c14" ); - e.put( "--output_mode EMIT_ALL_SITES", "fb7a59b318ecdb46fd96024be7e41e0e" ); + e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" ); + e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("f76099c403b60b6045a0ae7d9f589dc4")); + Arrays.asList("64be99183c100caed4aa5f8bad64c7e9")); executeTest("test confidence 1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("879e5ab09bd0d37e0300dd34ec09db81")); + Arrays.asList("e76ca54232d02f0d92730e1affeb804e")); executeTest("test confidence 2", spec2); } @@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "c7123f7b84b402f4959db950326afc13" ); - e.put( 1.0 / 1850, "75e6043a68265ab6deb284bb753801f9" ); + e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" ); + e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("3f45b2af75123e48b89fa1759c444ec0")); + Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); executeTest(String.format("test multiple technologies"), spec); } @@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("cede928592575e617f1323866348c256")); + Arrays.asList("0919ab7e513c377610e23a67d33608fa")); executeTest(String.format("test calling with BAQ"), spec); } @@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq OFF", 1, - Arrays.asList("3f45b2af75123e48b89fa1759c444ec0")); + Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d")); executeTest(String.format("test calling with BAQ OFF"), spec); } @@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("7fe14d81f12d5d57e3a522b2a4f07fc6")); + Arrays.asList("cb37348c41b8181be829912730f747e1")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("a7da8acce1957334619f3dfeac3d1379")); + Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2")); executeTest(String.format("test indel caller in SLX witn low min allele count"), spec); } @@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("234b6c336890cc6d9a495bc40f09d126")); + Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("9e342e3b73ae4887620195417e1af44a")); + Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("f265726403ca3f28518cb4290b7bee84")); + Arrays.asList("1b9764b783acf7822edc58e6822eef5b")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 0c841d746..2676f7067 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -52,7 +52,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec( baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod", 1, - Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d")); + Arrays.asList("05a114623c126b0398fbc1703437461e")); executeTest("realigner known indels only from dbsnp", spec2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java index 11eb615d2..7e1d86105 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/BatchMergeIntegrationTest.java @@ -40,7 +40,7 @@ public class BatchMergeIntegrationTest extends WalkerTest { + " -B:alleles,VCF " + alleles + " -I " + bam, 1, - Arrays.asList("b7839064dc4979400af4792460d9884b")); + Arrays.asList("f4ed8f4ef2cba96823c06e90e9d0de35")); executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java index e592c99b9..56bf66f53 100644 --- a/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java @@ -21,7 +21,7 @@ public class BedParserUnitTest extends BaseTest { private static IndexedFastaSequenceFile seq; private GenomeLocParser genomeLocParser; - private File bedFile = new File("testdata/sampleBedFile.bed"); + private File bedFile = new File("public/testdata/sampleBedFile.bed"); @BeforeClass public void beforeTests() { diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java index 2e618d5eb..e912f97e9 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java @@ -39,7 +39,7 @@ import java.io.IOException; */ public class HapMapUnitTest { // our sample hapmap file - private final static File hapMapFile = new File("testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap"); + private final static File hapMapFile = new File("public/testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap"); private final static String knownLine = "rs2185539 C/T chr1 556738 + ncbi_b36 bbs urn:lsid:bbs.hapmap.org:Protocol:Phase3.r3:1 urn:lsid:bbs.hapmap.org:Assay:Phase3.r3_r" + "s2185539:1 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:4 QC+ CC TC TT CT CC CC CC CC CC CC CC CC CC"; /** diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java index 3929ff388..2f6b589f4 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java @@ -17,8 +17,8 @@ import java.util.*; */ public class IndexFactoryUnitTest { - File inputFile = new File("testdata/HiSeq.10000.vcf"); - File outputFile = new File("testdata/onTheFlyOutputTest.vcf"); + File inputFile = new File("public/testdata/HiSeq.10000.vcf"); + File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf"); File outputFileIndex = Tribble.indexFile(outputFile); /** diff --git a/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java index f85a2f599..f0b1de6fe 100644 --- a/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/text/ListFileUtilsUnitTest.java @@ -49,12 +49,12 @@ public class ListFileUtilsUnitTest extends BaseTest { public void testIgnoreBlankLinesInBAMListFiles() throws Exception { File tempListFile = createTempListFile("testIgnoreBlankLines", "", - "testdata/exampleBAM.bam", + "public/testdata/exampleBAM.bam", " " ); List expectedBAMFileListAfterUnpacking = new ArrayList(); - expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking); } @@ -63,13 +63,13 @@ public class ListFileUtilsUnitTest extends BaseTest { public void testCommentSupportInBAMListFiles() throws Exception { File tempListFile = createTempListFile("testCommentSupport", "#", - "testdata/exampleBAM.bam", - "#testdata/foo.bam", - " # testdata/bar.bam" + "public/testdata/exampleBAM.bam", + "#public/testdata/foo.bam", + " # public/testdata/bar.bam" ); List expectedBAMFileListAfterUnpacking = new ArrayList(); - expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags())); + expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags())); performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking); } diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java index df4e1660d..78ab916db 100644 --- a/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/threading/GenomeLocProcessingTrackerUnitTest.java @@ -29,7 +29,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest { IndexedFastaSequenceFile fasta = null; GenomeLocParser genomeLocParser = null; String chr1 = null; - private final static String FILE_ROOT = "testdata/GLPTFile"; + private final static String FILE_ROOT = "public/testdata/GLPTFile"; @BeforeTest public void before() { diff --git a/public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java similarity index 90% rename from public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java rename to public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 5506b2d27..5d42f8d0c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -1,6 +1,6 @@ -package org.broadinstitute.sting.gatk.contexts.variantcontext; +package org.broadinstitute.sting.utils.variantcontext; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; @@ -19,12 +19,12 @@ public class VariantContextIntegrationTest extends WalkerTest { static HashMap expectations = new HashMap(); static { - expectations.put("-L 1:1-10000 --printPerLocus", "493bf9bcde93c08c8c46f72c8e98cf2f"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "bd9e062b23c2c48fef8c299dc1d294d5"); + expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde"); expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "91c6a9489256d9ce77c8fedf7221a961"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { // this really just tests that we are seeing the same number of objects over all of chr1 WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", 1, // just one output file - Arrays.asList("d2a3f2fe329a0a64145cfd19fde45b99")); + Arrays.asList("529f936aa6c303658b23caf4e527782f")); executeTest("testLargeScaleConversion", spec); } } diff --git a/public/scala/qscript/core/RecalibrateBaseQualities.scala b/public/scala/qscript/core/RecalibrateBaseQualities.scala new file mode 100755 index 000000000..ba13d267a --- /dev/null +++ b/public/scala/qscript/core/RecalibrateBaseQualities.scala @@ -0,0 +1,91 @@ +package core + +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ +import net.sf.samtools.SAMFileReader + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 4/20/11 + * Time: 16:29 PM + */ + + +class RecalibrateBaseQualities extends QScript { + + @Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true) + var GATKjar: File = _ + + @Input(doc="input BAM file - or list of BAM files", shortName="i", required=true) + var input: File = _ + + @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false) + var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R") + + @Input(doc="Reference fasta file", shortName="R", required=false) + var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + + @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false) + var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") + + val queueLogDir: String = ".qlog/" + var nContigs: Int = 0 + + def getNumberOfContigs(bamFile: File): Int = { + val samReader = new SAMFileReader(new File(bamFile)) + return samReader.getFileHeader.getSequenceDictionary.getSequences.size() + } + + def script = { + + nContigs = getNumberOfContigs(input) + + val recalFile1: File = new File("recal1.csv") + val recalFile2: File = new File("recal2.csv") + val recalBam: File = swapExt(input, ".bam", "recal.bam") + val path1: String = "before" + val path2: String = "after" + + add(cov(input, recalFile1), + recal(input, recalFile1, recalBam), + cov(recalBam, recalFile2), + analyzeCovariates(recalFile1, path1), + analyzeCovariates(recalFile2, path2)) + } + + trait CommandLineGATKArgs extends CommandLineGATK { + this.jarFile = GATKjar + this.reference_sequence = reference + this.memoryLimit = 4 + this.isIntermediate = true + } + + case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { + this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) + this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") + this.input_file :+= inBam + this.recal_file = outRecalFile + this.analysisName = queueLogDir + outRecalFile + ".covariates" + this.jobName = queueLogDir + outRecalFile + ".covariates" + this.scatterCount = nContigs + } + + case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { + this.input_file :+= inBam + this.recal_file = inRecalFile + this.out = outBam + this.isIntermediate = false + this.analysisName = queueLogDir + outBam + ".recalibration" + this.jobName = queueLogDir + outBam + ".recalibration" + this.scatterCount = nContigs + } + + case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { + this.resources = R + this.recal_file = inRecalFile + this.output_dir = outPath.toString + this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" + this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" + } +} \ No newline at end of file