Merge branch 'master' of ssh://delangel@nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2011-06-30 08:29:11 -04:00
commit 331b47afbd
22 changed files with 252 additions and 71 deletions

View File

@ -978,7 +978,7 @@
<!-- Build gsalib R module -->
<target name="gsalib">
<exec executable="R" failonerror="true">
<arg line="R CMD INSTALL -l R/ R/src/gsalib/" />
<arg line="R CMD INSTALL -l private/R/ private/R/src/gsalib/" />
</exec>
</target>
</project>

View File

@ -127,9 +127,9 @@ public class VariantContextAdaptors {
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
if ( DbSNPHelper.isDeletion(dbsnp) ) {
int index = ref.getLocus().getStart() - ref.getWindow().getStart() - 1;
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
throw new ReviewedStingException("DbSNP conversion requested using a reference context with no window; we will fail to convert deletions");
return null; // we weren't given enough reference context to create the VariantContext
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
}
Collection<Genotype> genotypes = null;

View File

@ -4,9 +4,11 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import net.sf.samtools.*;
import java.util.Arrays;
import java.util.HashMap;
@ -67,20 +69,95 @@ public class ReadPosRankSumTest extends RankSumTest {
altLikelihood = like;
}
}
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p.getOffset(), 0, 0);
final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead());
int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
final int numAlignedBases = getNumAlignedBases(p.getRead());
int rp = readPos;
if( readPos > numAlignedBases / 2 ) {
readPos = numAlignedBases - ( readPos + 1 );
}
//if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases);
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH))
// if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads
// where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping.
// if (readPos < -1)
// return;
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) {
refQuals.add((double)readPos);
else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH))
//if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos);
}
else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) {
altQuals.add((double)readPos);
//if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos);
}
}
}
}
int getNumClippedBasesAtStart(SAMRecord read) {
// compute total number of clipped bases (soft or hard clipped)
// check for hard clips (never consider these bases):
final Cigar c = read.getCigar();
final CigarElement first = c.getCigarElement(0);
int numStartClippedBases = 0;
if (first.getOperator() == CigarOperator.H) {
numStartClippedBases = first.getLength();
}
byte[] unclippedReadBases = read.getReadBases();
byte[] unclippedReadQuals = read.getBaseQualities();
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
// and may leave a string of Q2 bases still hanging off the reads.
for (int i=numStartClippedBases; i < unclippedReadBases.length; i++) {
if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
numStartClippedBases++;
else
break;
}
return numStartClippedBases;
}
int getNumAlignedBases(SAMRecord read) {
return read.getReadLength() - getNumClippedBasesAtStart(read) - getNumClippedBasesAtEnd(read);
}
int getNumClippedBasesAtEnd(SAMRecord read) {
// compute total number of clipped bases (soft or hard clipped)
// check for hard clips (never consider these bases):
final Cigar c = read.getCigar();
CigarElement last = c.getCigarElement(c.numCigarElements()-1);
int numEndClippedBases = 0;
if (last.getOperator() == CigarOperator.H) {
numEndClippedBases = last.getLength();
}
byte[] unclippedReadBases = read.getReadBases();
byte[] unclippedReadQuals = read.getBaseQualities();
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
// and may leave a string of Q2 bases still hanging off the reads.
for (int i=unclippedReadBases.length-numEndClippedBases-1; i >= 0; i-- ){
if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
numEndClippedBases++;
else
break;
}
return numEndClippedBases;
}
int getOffsetFromClippedReadStart(SAMRecord read, int offset) {
return offset - getNumClippedBasesAtStart(read);
}
}

View File

@ -798,11 +798,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private void generateAlternateConsensesFromKnownIndels(final Set<Consensus> altConsensesToPopulate, final int leftmostIndex, final byte[] reference) {
for ( VariantContext knownIndel : knownIndelsToTry ) {
if ( knownIndel == null || !knownIndel.isIndel() )
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
continue;
byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
int start = knownIndel.getStart() - leftmostIndex + 1;
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel.isDeletion());
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
if ( c != null )
altConsensesToPopulate.add(c);
}
@ -988,7 +988,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
// create a Consensus from just the indel string that falls on the reference
private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final boolean isDeletion) {
private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final VariantContext indel) {
if ( indexOnRef < 0 || indexOnRef >= reference.length )
return null;
@ -1002,14 +1002,16 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
if ( indexOnRef > 0 )
cigar.add(new CigarElement(indexOnRef, CigarOperator.M));
if ( isDeletion ) {
if ( indel.isDeletion() ) {
refIdx += indelStr.length;
cigar.add(new CigarElement(indelStr.length, CigarOperator.D));
}
else {
else if ( indel.isInsertion() ) {
for ( byte b : indelStr )
sb.append((char)b);
cigar.add(new CigarElement(indelStr.length, CigarOperator.I));
} else {
throw new ReviewedStingException("Creating an alternate consensus from a complex indel is not allowed");
}
if ( reference.length - refIdx > 0 )

View File

@ -32,9 +32,9 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate;
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDataManager;
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDatum;
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalibrationArgumentCollection;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection;
*/import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
@ -57,7 +57,7 @@ import java.util.regex.Pattern;
public class PairHMMIndelErrorModel {
private final int BASE_QUAL_THRESHOLD = 10;
public static final int BASE_QUAL_THRESHOLD = 20;
private static final int MATCH_OFFSET = 0;

View File

@ -161,7 +161,7 @@ public class AnnotateMNPsWalker extends RodWalker<Integer, Integer> {
boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart();
boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop();
if (vc.getType() == VariantContext.Type.MNP) {
if (vc.isMNP()) {
logger.debug("Observed MNP at " + vcLoc);
if (isChrM(vc)) {

View File

@ -209,7 +209,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
String assay_sequence;
if ( vc.isSNP() )
assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
else if ( vc.getType() == VariantContext.Type.MNP )
else if ( vc.isMNP() )
assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1);
else if ( vc.isInsertion() )
assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;

View File

@ -113,7 +113,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
}
// deletions
else if ( vc.isDeletion() || vc.isMixed() || vc.getType() == VariantContext.Type.MNP ) {
else if ( vc.isDeletion() || vc.isMixed() || vc.isMNP() ) {
// we can't validate arbitrarily long deletions
if ( reportedRefAllele.length() > 100 ) {
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
@ -123,7 +123,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
// deletions are associated with the (position of) the last (preceding) non-deleted base;
// hence to get actually deleted bases we need offset = 1
int offset = 1 ;
if ( vc.getType() == VariantContext.Type.MNP ) offset = 0; // if it's an MNP, the reported position IS the first modified base
if ( vc.isMNP() ) offset = 0; // if it's an MNP, the reported position IS the first modified base
byte[] refBytes = ref.getBases();
byte[] trueRef = new byte[reportedRefAllele.length()];
for (int i = 0; i < reportedRefAllele.length(); i++)

View File

@ -190,7 +190,7 @@ public class VCFUtils {
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
return result;
}

View File

@ -566,10 +566,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return getType() == Type.INDEL && getAlternateAllele(0).isNull();
}
/**
* @return true if the alleles indicate neither a simple deletion nor a simple insertion
*/
public boolean isComplexIndel() {
return isIndel() && !isDeletion() && !isInsertion();
}
public boolean isSymbolic() {
return getType() == Type.SYMBOLIC;
}
public boolean isMNP() {
return getType() == Type.MNP;
}
/**
* convenience method for indels
*

View File

@ -83,7 +83,7 @@ public abstract class BaseTest {
*/
public static final String MD5_FILE_DB_SUBDIR = "integrationtests";
public static final String testDir = "testdata/";
public static final String testDir = "public/testdata/";
/** before the class starts up */
static {

View File

@ -60,8 +60,8 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
testEngine.setSAMFileIDs(samFiles);
testEngine.checkForDuplicateSamFiles();
@ -72,10 +72,10 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("testdata/exampleNORG.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
samFiles.add(new SAMReaderID(new File("public/testdata/exampleNORG.bam"), new Tags()));
testEngine.setSAMFileIDs(samFiles);
testEngine.checkForDuplicateSamFiles();
@ -97,7 +97,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
GATKArgumentCollection argCollection = new GATKArgumentCollection();
testEngine.setArguments(argCollection);
File fastaFile = new File("testdata/exampleFASTA.fasta");
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
testEngine.setGenomeLocParser(genomeLocParser);

View File

@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("a604a64252a8538b7d13f52bd068f797"));
Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0"));
executeTest("test MultiSample Pilot1", spec);
}
@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("5844eda3596732a16c8559f5bfbe1723"));
Arrays.asList("edeb1db288a24baff59575ceedd94243"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("db4664a1785c4efb4cd9057478aa846f"));
Arrays.asList("581990130d90071b084024f4cd7caf91"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("36c70ec27a25f88fe2364bba2f961843"));
Arrays.asList("d120db27d694a6da32367cc4fb5770fa"));
executeTest("test SingleSample Pilot2", spec);
}
@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "212eab2024903997625ba98009063226";
private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4";
@Test
public void testCompressedOutput() {
@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "f83a33a1ecc350cae0c002e4a43a7861";
String md5 = "a29615dd37222a11b8dadd341b53e43c";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--min_base_quality_score 26", "d10d0be159d80e22b9c81970ee098daf" );
e.put( "--min_mapping_quality_score 26", "f76099c403b60b6045a0ae7d9f589dc4" );
e.put( "--p_nonref_model GRID_SEARCH", "cda395fdf7352e07537610f52a6d0cdc" );
e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" );
e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" );
e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "9b85d9c10d634315d20aefa565dbab60" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "245abbb39de43e89f63918a6771c0c14" );
e.put( "--output_mode EMIT_ALL_SITES", "fb7a59b318ecdb46fd96024be7e41e0e" );
e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" );
e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("f76099c403b60b6045a0ae7d9f589dc4"));
Arrays.asList("64be99183c100caed4aa5f8bad64c7e9"));
executeTest("test confidence 1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("879e5ab09bd0d37e0300dd34ec09db81"));
Arrays.asList("e76ca54232d02f0d92730e1affeb804e"));
executeTest("test confidence 2", spec2);
}
@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "c7123f7b84b402f4959db950326afc13" );
e.put( 1.0 / 1850, "75e6043a68265ab6deb284bb753801f9" );
e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" );
e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("3f45b2af75123e48b89fa1759c444ec0"));
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("cede928592575e617f1323866348c256"));
Arrays.asList("0919ab7e513c377610e23a67d33608fa"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq OFF",
1,
Arrays.asList("3f45b2af75123e48b89fa1759c444ec0"));
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
executeTest(String.format("test calling with BAQ OFF"), spec);
}
@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("7fe14d81f12d5d57e3a522b2a4f07fc6"));
Arrays.asList("cb37348c41b8181be829912730f747e1"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("a7da8acce1957334619f3dfeac3d1379"));
Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2"));
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
}
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("234b6c336890cc6d9a495bc40f09d126"));
Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("9e342e3b73ae4887620195417e1af44a"));
Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("f265726403ca3f28518cb4290b7bee84"));
Arrays.asList("1b9764b783acf7822edc58e6822eef5b"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}

View File

@ -52,7 +52,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
WalkerTestSpec spec2 = new WalkerTestSpec(
baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod",
1,
Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d"));
Arrays.asList("05a114623c126b0398fbc1703437461e"));
executeTest("realigner known indels only from dbsnp", spec2);
}

View File

@ -40,7 +40,7 @@ public class BatchMergeIntegrationTest extends WalkerTest {
+ " -B:alleles,VCF " + alleles
+ " -I " + bam,
1,
Arrays.asList("b7839064dc4979400af4792460d9884b"));
Arrays.asList("f4ed8f4ef2cba96823c06e90e9d0de35"));
executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec);
}
}

View File

@ -21,7 +21,7 @@ public class BedParserUnitTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
private GenomeLocParser genomeLocParser;
private File bedFile = new File("testdata/sampleBedFile.bed");
private File bedFile = new File("public/testdata/sampleBedFile.bed");
@BeforeClass
public void beforeTests() {

View File

@ -39,7 +39,7 @@ import java.io.IOException;
*/
public class HapMapUnitTest {
// our sample hapmap file
private final static File hapMapFile = new File("testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap");
private final static File hapMapFile = new File("public/testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap");
private final static String knownLine = "rs2185539 C/T chr1 556738 + ncbi_b36 bbs urn:lsid:bbs.hapmap.org:Protocol:Phase3.r3:1 urn:lsid:bbs.hapmap.org:Assay:Phase3.r3_r" +
"s2185539:1 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:4 QC+ CC TC TT CT CC CC CC CC CC CC CC CC CC";
/**

View File

@ -17,8 +17,8 @@ import java.util.*;
*/
public class IndexFactoryUnitTest {
File inputFile = new File("testdata/HiSeq.10000.vcf");
File outputFile = new File("testdata/onTheFlyOutputTest.vcf");
File inputFile = new File("public/testdata/HiSeq.10000.vcf");
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
File outputFileIndex = Tribble.indexFile(outputFile);
/**

View File

@ -49,12 +49,12 @@ public class ListFileUtilsUnitTest extends BaseTest {
public void testIgnoreBlankLinesInBAMListFiles() throws Exception {
File tempListFile = createTempListFile("testIgnoreBlankLines",
"",
"testdata/exampleBAM.bam",
"public/testdata/exampleBAM.bam",
" "
);
List<SAMReaderID> expectedBAMFileListAfterUnpacking = new ArrayList<SAMReaderID>();
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
}
@ -63,13 +63,13 @@ public class ListFileUtilsUnitTest extends BaseTest {
public void testCommentSupportInBAMListFiles() throws Exception {
File tempListFile = createTempListFile("testCommentSupport",
"#",
"testdata/exampleBAM.bam",
"#testdata/foo.bam",
" # testdata/bar.bam"
"public/testdata/exampleBAM.bam",
"#public/testdata/foo.bam",
" # public/testdata/bar.bam"
);
List<SAMReaderID> expectedBAMFileListAfterUnpacking = new ArrayList<SAMReaderID>();
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("testdata/exampleBAM.bam"), new Tags()));
expectedBAMFileListAfterUnpacking.add(new SAMReaderID(new File("public/testdata/exampleBAM.bam"), new Tags()));
performBAMListFileUnpackingTest(tempListFile, expectedBAMFileListAfterUnpacking);
}

View File

@ -29,7 +29,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
IndexedFastaSequenceFile fasta = null;
GenomeLocParser genomeLocParser = null;
String chr1 = null;
private final static String FILE_ROOT = "testdata/GLPTFile";
private final static String FILE_ROOT = "public/testdata/GLPTFile";
@BeforeTest
public void before() {

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.contexts.variantcontext;
package org.broadinstitute.sting.utils.variantcontext;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
@ -19,12 +19,12 @@ public class VariantContextIntegrationTest extends WalkerTest {
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-L 1:1-10000 --printPerLocus", "493bf9bcde93c08c8c46f72c8e98cf2f");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "bd9e062b23c2c48fef8c299dc1d294d5");
expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "91c6a9489256d9ce77c8fedf7221a961");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc");
@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
// this really just tests that we are seeing the same number of objects over all of chr1
WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s",
1, // just one output file
Arrays.asList("d2a3f2fe329a0a64145cfd19fde45b99"));
Arrays.asList("529f936aa6c303658b23caf4e527782f"));
executeTest("testLargeScaleConversion", spec);
}
}

View File

@ -0,0 +1,91 @@
package core
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import net.sf.samtools.SAMFileReader
/**
* Created by IntelliJ IDEA.
* User: carneiro
* Date: 4/20/11
* Time: 16:29 PM
*/
class RecalibrateBaseQualities extends QScript {
@Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true)
var GATKjar: File = _
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
var input: File = _
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false)
var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R")
@Input(doc="Reference fasta file", shortName="R", required=false)
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
val queueLogDir: String = ".qlog/"
var nContigs: Int = 0
def getNumberOfContigs(bamFile: File): Int = {
val samReader = new SAMFileReader(new File(bamFile))
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
}
def script = {
nContigs = getNumberOfContigs(input)
val recalFile1: File = new File("recal1.csv")
val recalFile2: File = new File("recal2.csv")
val recalBam: File = swapExt(input, ".bam", "recal.bam")
val path1: String = "before"
val path2: String = "after"
add(cov(input, recalFile1),
recal(input, recalFile1, recalBam),
cov(recalBam, recalFile2),
analyzeCovariates(recalFile1, path1),
analyzeCovariates(recalFile2, path2))
}
trait CommandLineGATKArgs extends CommandLineGATK {
this.jarFile = GATKjar
this.reference_sequence = reference
this.memoryLimit = 4
this.isIntermediate = true
}
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.input_file :+= inBam
this.recal_file = outRecalFile
this.analysisName = queueLogDir + outRecalFile + ".covariates"
this.jobName = queueLogDir + outRecalFile + ".covariates"
this.scatterCount = nContigs
}
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
this.input_file :+= inBam
this.recal_file = inRecalFile
this.out = outBam
this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration"
this.jobName = queueLogDir + outBam + ".recalibration"
this.scatterCount = nContigs
}
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
this.resources = R
this.recal_file = inRecalFile
this.output_dir = outPath.toString
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
}
}