Merge branch 'master' of ssh://delangel@nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
331b47afbd
|
|
@ -978,7 +978,7 @@
|
|||
<!-- Build gsalib R module -->
|
||||
<target name="gsalib">
|
||||
<exec executable="R" failonerror="true">
|
||||
<arg line="R CMD INSTALL -l R/ R/src/gsalib/" />
|
||||
<arg line="R CMD INSTALL -l private/R/ private/R/src/gsalib/" />
|
||||
</exec>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
|||
|
|
@ -127,9 +127,9 @@ public class VariantContextAdaptors {
|
|||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
|
||||
if ( DbSNPHelper.isDeletion(dbsnp) ) {
|
||||
int index = ref.getLocus().getStart() - ref.getWindow().getStart() - 1;
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
throw new ReviewedStingException("DbSNP conversion requested using a reference context with no window; we will fail to convert deletions");
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
|
||||
}
|
||||
Collection<Genotype> genotypes = null;
|
||||
|
|
|
|||
|
|
@ -4,9 +4,11 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import net.sf.samtools.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -67,20 +69,95 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
altLikelihood = like;
|
||||
}
|
||||
}
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p.getOffset(), 0, 0);
|
||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead());
|
||||
|
||||
int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
|
||||
final int numAlignedBases = getNumAlignedBases(p.getRead());
|
||||
|
||||
int rp = readPos;
|
||||
if( readPos > numAlignedBases / 2 ) {
|
||||
readPos = numAlignedBases - ( readPos + 1 );
|
||||
}
|
||||
//if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases);
|
||||
|
||||
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH))
|
||||
|
||||
// if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads
|
||||
// where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping.
|
||||
// if (readPos < -1)
|
||||
// return;
|
||||
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
||||
refQuals.add((double)readPos);
|
||||
else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH))
|
||||
//if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
||||
}
|
||||
else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
||||
altQuals.add((double)readPos);
|
||||
//if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int getNumClippedBasesAtStart(SAMRecord read) {
|
||||
// compute total number of clipped bases (soft or hard clipped)
|
||||
// check for hard clips (never consider these bases):
|
||||
final Cigar c = read.getCigar();
|
||||
final CigarElement first = c.getCigarElement(0);
|
||||
|
||||
int numStartClippedBases = 0;
|
||||
if (first.getOperator() == CigarOperator.H) {
|
||||
numStartClippedBases = first.getLength();
|
||||
}
|
||||
byte[] unclippedReadBases = read.getReadBases();
|
||||
byte[] unclippedReadQuals = read.getBaseQualities();
|
||||
|
||||
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
|
||||
// and may leave a string of Q2 bases still hanging off the reads.
|
||||
for (int i=numStartClippedBases; i < unclippedReadBases.length; i++) {
|
||||
if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
|
||||
numStartClippedBases++;
|
||||
else
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
return numStartClippedBases;
|
||||
}
|
||||
|
||||
int getNumAlignedBases(SAMRecord read) {
|
||||
return read.getReadLength() - getNumClippedBasesAtStart(read) - getNumClippedBasesAtEnd(read);
|
||||
}
|
||||
|
||||
int getNumClippedBasesAtEnd(SAMRecord read) {
|
||||
// compute total number of clipped bases (soft or hard clipped)
|
||||
// check for hard clips (never consider these bases):
|
||||
final Cigar c = read.getCigar();
|
||||
CigarElement last = c.getCigarElement(c.numCigarElements()-1);
|
||||
|
||||
int numEndClippedBases = 0;
|
||||
if (last.getOperator() == CigarOperator.H) {
|
||||
numEndClippedBases = last.getLength();
|
||||
}
|
||||
byte[] unclippedReadBases = read.getReadBases();
|
||||
byte[] unclippedReadQuals = read.getBaseQualities();
|
||||
|
||||
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
|
||||
// and may leave a string of Q2 bases still hanging off the reads.
|
||||
for (int i=unclippedReadBases.length-numEndClippedBases-1; i >= 0; i-- ){
|
||||
if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
|
||||
numEndClippedBases++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return numEndClippedBases;
|
||||
}
|
||||
|
||||
int getOffsetFromClippedReadStart(SAMRecord read, int offset) {
|
||||
|
||||
|
||||
return offset - getNumClippedBasesAtStart(read);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -798,11 +798,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
private void generateAlternateConsensesFromKnownIndels(final Set<Consensus> altConsensesToPopulate, final int leftmostIndex, final byte[] reference) {
|
||||
for ( VariantContext knownIndel : knownIndelsToTry ) {
|
||||
if ( knownIndel == null || !knownIndel.isIndel() )
|
||||
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
|
||||
continue;
|
||||
byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
|
||||
int start = knownIndel.getStart() - leftmostIndex + 1;
|
||||
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel.isDeletion());
|
||||
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
|
||||
if ( c != null )
|
||||
altConsensesToPopulate.add(c);
|
||||
}
|
||||
|
|
@ -988,7 +988,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// create a Consensus from just the indel string that falls on the reference
|
||||
private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final boolean isDeletion) {
|
||||
private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final VariantContext indel) {
|
||||
if ( indexOnRef < 0 || indexOnRef >= reference.length )
|
||||
return null;
|
||||
|
||||
|
|
@ -1002,14 +1002,16 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
if ( indexOnRef > 0 )
|
||||
cigar.add(new CigarElement(indexOnRef, CigarOperator.M));
|
||||
|
||||
if ( isDeletion ) {
|
||||
if ( indel.isDeletion() ) {
|
||||
refIdx += indelStr.length;
|
||||
cigar.add(new CigarElement(indelStr.length, CigarOperator.D));
|
||||
}
|
||||
else {
|
||||
else if ( indel.isInsertion() ) {
|
||||
for ( byte b : indelStr )
|
||||
sb.append((char)b);
|
||||
cigar.add(new CigarElement(indelStr.length, CigarOperator.I));
|
||||
} else {
|
||||
throw new ReviewedStingException("Creating an alternate consensus from a complex indel is not allowed");
|
||||
}
|
||||
|
||||
if ( reference.length - refIdx > 0 )
|
||||
|
|
|
|||
|
|
@ -32,9 +32,9 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate;
|
||||
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDataManager;
|
||||
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDatum;
|
||||
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection;
|
||||
*/import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
|
|
@ -57,7 +57,7 @@ import java.util.regex.Pattern;
|
|||
public class PairHMMIndelErrorModel {
|
||||
|
||||
|
||||
private final int BASE_QUAL_THRESHOLD = 10;
|
||||
public static final int BASE_QUAL_THRESHOLD = 20;
|
||||
|
||||
|
||||
private static final int MATCH_OFFSET = 0;
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ public class AnnotateMNPsWalker extends RodWalker<Integer, Integer> {
|
|||
boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart();
|
||||
boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop();
|
||||
|
||||
if (vc.getType() == VariantContext.Type.MNP) {
|
||||
if (vc.isMNP()) {
|
||||
logger.debug("Observed MNP at " + vcLoc);
|
||||
|
||||
if (isChrM(vc)) {
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
String assay_sequence;
|
||||
if ( vc.isSNP() )
|
||||
assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
||||
else if ( vc.getType() == VariantContext.Type.MNP )
|
||||
else if ( vc.isMNP() )
|
||||
assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1);
|
||||
else if ( vc.isInsertion() )
|
||||
assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
|
||||
}
|
||||
// deletions
|
||||
else if ( vc.isDeletion() || vc.isMixed() || vc.getType() == VariantContext.Type.MNP ) {
|
||||
else if ( vc.isDeletion() || vc.isMixed() || vc.isMNP() ) {
|
||||
// we can't validate arbitrarily long deletions
|
||||
if ( reportedRefAllele.length() > 100 ) {
|
||||
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
|
||||
|
|
@ -123,7 +123,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
// deletions are associated with the (position of) the last (preceding) non-deleted base;
|
||||
// hence to get actually deleted bases we need offset = 1
|
||||
int offset = 1 ;
|
||||
if ( vc.getType() == VariantContext.Type.MNP ) offset = 0; // if it's an MNP, the reported position IS the first modified base
|
||||
if ( vc.isMNP() ) offset = 0; // if it's an MNP, the reported position IS the first modified base
|
||||
byte[] refBytes = ref.getBases();
|
||||
byte[] trueRef = new byte[reportedRefAllele.length()];
|
||||
for (int i = 0; i < reportedRefAllele.length(); i++)
|
||||
|
|
|
|||
|
|
@ -190,7 +190,7 @@ public class VCFUtils {
|
|||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -566,10 +566,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return getType() == Type.INDEL && getAlternateAllele(0).isNull();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the alleles indicate neither a simple deletion nor a simple insertion
|
||||
*/
|
||||
public boolean isComplexIndel() {
|
||||
return isIndel() && !isDeletion() && !isInsertion();
|
||||
}
|
||||
|
||||
public boolean isSymbolic() {
|
||||
return getType() == Type.SYMBOLIC;
|
||||
}
|
||||
|
||||
public boolean isMNP() {
|
||||
return getType() == Type.MNP;
|
||||
}
|
||||
|
||||
/**
|
||||
* convenience method for indels
|
||||
*
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public abstract class BaseTest {
|
|||
*/
|
||||
public static final String MD5_FILE_DB_SUBDIR = "integrationtests";
|
||||
|
||||
public static final String testDir = "testdata/";
|
||||
public static final String testDir = "public/testdata/";
|
||||
|
||||
/** before the class starts up */
|
||||
static {
|
||||
|
|
|
|||
|
|
@ -60,8 +60,8 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
|
||||
testEngine.setSAMFileIDs(samFiles);
|
||||
testEngine.checkForDuplicateSamFiles();
|
||||
|
|
@ -72,10 +72,10 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags()));
|
||||
|
||||
testEngine.setSAMFileIDs(samFiles);
|
||||
testEngine.checkForDuplicateSamFiles();
|
||||
|
|
@ -97,7 +97,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||
testEngine.setArguments(argCollection);
|
||||
|
||||
File fastaFile = new File("testdata/exampleFASTA.fasta");
|
||||
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
|
||||
testEngine.setGenomeLocParser(genomeLocParser);
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("a604a64252a8538b7d13f52bd068f797"));
|
||||
Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("5844eda3596732a16c8559f5bfbe1723"));
|
||||
Arrays.asList("edeb1db288a24baff59575ceedd94243"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("db4664a1785c4efb4cd9057478aa846f"));
|
||||
Arrays.asList("581990130d90071b084024f4cd7caf91"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("36c70ec27a25f88fe2364bba2f961843"));
|
||||
Arrays.asList("d120db27d694a6da32367cc4fb5770fa"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "212eab2024903997625ba98009063226";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "f83a33a1ecc350cae0c002e4a43a7861";
|
||||
String md5 = "a29615dd37222a11b8dadd341b53e43c";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCallingParameters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "--min_base_quality_score 26", "d10d0be159d80e22b9c81970ee098daf" );
|
||||
e.put( "--min_mapping_quality_score 26", "f76099c403b60b6045a0ae7d9f589dc4" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "cda395fdf7352e07537610f52a6d0cdc" );
|
||||
e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" );
|
||||
e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "9b85d9c10d634315d20aefa565dbab60" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "245abbb39de43e89f63918a6771c0c14" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "fb7a59b318ecdb46fd96024be7e41e0e" );
|
||||
e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("f76099c403b60b6045a0ae7d9f589dc4"));
|
||||
Arrays.asList("64be99183c100caed4aa5f8bad64c7e9"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("879e5ab09bd0d37e0300dd34ec09db81"));
|
||||
Arrays.asList("e76ca54232d02f0d92730e1affeb804e"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "c7123f7b84b402f4959db950326afc13" );
|
||||
e.put( 1.0 / 1850, "75e6043a68265ab6deb284bb753801f9" );
|
||||
e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" );
|
||||
e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("3f45b2af75123e48b89fa1759c444ec0"));
|
||||
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("cede928592575e617f1323866348c256"));
|
||||
Arrays.asList("0919ab7e513c377610e23a67d33608fa"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq OFF",
|
||||
1,
|
||||
Arrays.asList("3f45b2af75123e48b89fa1759c444ec0"));
|
||||
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ OFF"), spec);
|
||||
}
|
||||
|
|
@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("7fe14d81f12d5d57e3a522b2a4f07fc6"));
|
||||
Arrays.asList("cb37348c41b8181be829912730f747e1"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("a7da8acce1957334619f3dfeac3d1379"));
|
||||
Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("234b6c336890cc6d9a495bc40f09d126"));
|
||||
Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("9e342e3b73ae4887620195417e1af44a"));
|
||||
Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("f265726403ca3f28518cb4290b7bee84"));
|
||||
Arrays.asList("1b9764b783acf7822edc58e6822eef5b"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod",
|
||||
1,
|
||||
Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d"));
|
||||
Arrays.asList("05a114623c126b0398fbc1703437461e"));
|
||||
executeTest("realigner known indels only from dbsnp", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class BatchMergeIntegrationTest extends WalkerTest {
|
|||
+ " -B:alleles,VCF " + alleles
|
||||
+ " -I " + bam,
|
||||
1,
|
||||
Arrays.asList("b7839064dc4979400af4792460d9884b"));
|
||||
Arrays.asList("f4ed8f4ef2cba96823c06e90e9d0de35"));
|
||||
executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -21,7 +21,7 @@ public class BedParserUnitTest extends BaseTest {
|
|||
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private File bedFile = new File("testdata/sampleBedFile.bed");
|
||||
private File bedFile = new File("public/testdata/sampleBedFile.bed");
|
||||
|
||||
@BeforeClass
|
||||
public void beforeTests() {
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import java.io.IOException;
|
|||
*/
|
||||
public class HapMapUnitTest {
|
||||
// our sample hapmap file
|
||||
private final static File hapMapFile = new File("testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap");
|
||||
private final static File hapMapFile = new File("public/testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap");
|
||||
private final static String knownLine = "rs2185539 C/T chr1 556738 + ncbi_b36 bbs urn:lsid:bbs.hapmap.org:Protocol:Phase3.r3:1 urn:lsid:bbs.hapmap.org:Assay:Phase3.r3_r" +
|
||||
"s2185539:1 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:4 QC+ CC TC TT CT CC CC CC CC CC CC CC CC CC";
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ import java.util.*;
|
|||
*/
|
||||
public class IndexFactoryUnitTest {
|
||||
|
||||
File inputFile = new File("testdata/HiSeq.10000.vcf");
|
||||
File outputFile = new File("testdata/onTheFlyOutputTest.vcf");
|
||||
File inputFile = new File("public/testdata/HiSeq.10000.vcf");
|
||||
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
|
||||
File outputFileIndex = Tribble.indexFile(outputFile);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -49,12 +49,12 @@ public class ListFileUtilsUnitTest extends BaseTest {
|
|||
public void testIgnoreBlankLinesInBAMListFiles() throws Exception {
|
||||
File tempListFile = createTempListFile("testIgnoreBlankLines",
|
||||
"",
|
||||
"testdata/exampleBAM.bam",
|
||||
"public/testdata/exampleBAM.bam",
|
||||
" "
|
||||
);
|
||||
|
||||
List<SAMReaderID> expectedBAMFileListAfterUnpacking = new ArrayList<SAMReaderID>();
|
||||
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
|
||||
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
|
||||
}
|
||||
|
|
@ -63,13 +63,13 @@ public class ListFileUtilsUnitTest extends BaseTest {
|
|||
public void testCommentSupportInBAMListFiles() throws Exception {
|
||||
File tempListFile = createTempListFile("testCommentSupport",
|
||||
"#",
|
||||
"testdata/exampleBAM.bam",
|
||||
"#testdata/foo.bam",
|
||||
" # testdata/bar.bam"
|
||||
"public/testdata/exampleBAM.bam",
|
||||
"#public/testdata/foo.bam",
|
||||
" # public/testdata/bar.bam"
|
||||
);
|
||||
|
||||
List<SAMReaderID> expectedBAMFileListAfterUnpacking = new ArrayList<SAMReaderID>();
|
||||
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
|
||||
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
|
||||
|
||||
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
|||
IndexedFastaSequenceFile fasta = null;
|
||||
GenomeLocParser genomeLocParser = null;
|
||||
String chr1 = null;
|
||||
private final static String FILE_ROOT = "testdata/GLPTFile";
|
||||
private final static String FILE_ROOT = "public/testdata/GLPTFile";
|
||||
|
||||
@BeforeTest
|
||||
public void before() {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
|
||||
package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -19,12 +19,12 @@ public class VariantContextIntegrationTest extends WalkerTest {
|
|||
|
||||
static HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
static {
|
||||
expectations.put("-L 1:1-10000 --printPerLocus", "493bf9bcde93c08c8c46f72c8e98cf2f");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "bd9e062b23c2c48fef8c299dc1d294d5");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "91c6a9489256d9ce77c8fedf7221a961");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126");
|
||||
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc");
|
||||
|
|
@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
|
|||
// this really just tests that we are seeing the same number of objects over all of chr1
|
||||
WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList("d2a3f2fe329a0a64145cfd19fde45b99"));
|
||||
Arrays.asList("529f936aa6c303658b23caf4e527782f"));
|
||||
executeTest("testLargeScaleConversion", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
package core
|
||||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import net.sf.samtools.SAMFileReader
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: carneiro
|
||||
* Date: 4/20/11
|
||||
* Time: 16:29 PM
|
||||
*/
|
||||
|
||||
|
||||
class RecalibrateBaseQualities extends QScript {
|
||||
|
||||
@Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true)
|
||||
var GATKjar: File = _
|
||||
|
||||
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
|
||||
var input: File = _
|
||||
|
||||
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false)
|
||||
var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R")
|
||||
|
||||
@Input(doc="Reference fasta file", shortName="R", required=false)
|
||||
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||
|
||||
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
|
||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||
|
||||
val queueLogDir: String = ".qlog/"
|
||||
var nContigs: Int = 0
|
||||
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(new File(bamFile))
|
||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
}
|
||||
|
||||
def script = {
|
||||
|
||||
nContigs = getNumberOfContigs(input)
|
||||
|
||||
val recalFile1: File = new File("recal1.csv")
|
||||
val recalFile2: File = new File("recal2.csv")
|
||||
val recalBam: File = swapExt(input, ".bam", "recal.bam")
|
||||
val path1: String = "before"
|
||||
val path2: String = "after"
|
||||
|
||||
add(cov(input, recalFile1),
|
||||
recal(input, recalFile1, recalBam),
|
||||
cov(recalBam, recalFile2),
|
||||
analyzeCovariates(recalFile1, path1),
|
||||
analyzeCovariates(recalFile2, path2))
|
||||
}
|
||||
|
||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||
this.jarFile = GATKjar
|
||||
this.reference_sequence = reference
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
}
|
||||
|
||||
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = outRecalFile
|
||||
this.analysisName = queueLogDir + outRecalFile + ".covariates"
|
||||
this.jobName = queueLogDir + outRecalFile + ".covariates"
|
||||
this.scatterCount = nContigs
|
||||
}
|
||||
|
||||
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = inRecalFile
|
||||
this.out = outBam
|
||||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||
this.scatterCount = nContigs
|
||||
}
|
||||
|
||||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||
this.resources = R
|
||||
this.recal_file = inRecalFile
|
||||
this.output_dir = outPath.toString
|
||||
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue