Merge branch 'develop' of github.com:broadinstitute/cmi-gatk into develop

This commit is contained in:
Guillermo del Angel 2012-10-31 19:51:53 -04:00
commit 6aec172c16
18 changed files with 325 additions and 270 deletions

View File

@ -39,57 +39,12 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
// optimization: only allocate temp arrays once per thread // optimization: only allocate temp arrays once per thread
private final ThreadLocal<byte[]> threadLocalTempQualArray = new ThreadLocalArray<byte[]>(EventType.values().length, byte.class); private final ThreadLocal<byte[]> threadLocalTempQualArray = new ThreadLocalArray<byte[]>(EventType.values().length, byte.class);
private final ThreadLocal<boolean[]> threadLocalTempErrorArray = new ThreadLocalArray<boolean[]>(EventType.values().length, boolean.class);
private final ThreadLocal<double[]> threadLocalTempFractionalErrorArray = new ThreadLocalArray<double[]>(EventType.values().length, double.class); private final ThreadLocal<double[]> threadLocalTempFractionalErrorArray = new ThreadLocalArray<double[]>(EventType.values().length, double.class);
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
super.initialize(covariates, recalibrationTables); super.initialize(covariates, recalibrationTables);
} }
/**
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
* adding one to the number of observations and potentially one to the number of mismatches for all three
* categories (mismatches, insertions and deletions).
*
* @param pileupElement The pileup element to update
* @param refBase The reference base at this locus
*/
@Override
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
final int offset = pileupElement.getOffset();
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
byte[] tempQualArray = threadLocalTempQualArray.get();
boolean[] tempErrorArray = threadLocalTempErrorArray.get();
tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual();
tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual();
tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion();
tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual();
tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase();
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.index;
final byte qual = tempQualArray[eventIndex];
final boolean isError = tempErrorArray[eventIndex];
// TODO: should this really be combine rather than increment?
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
@Override @Override
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) { public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
for( int offset = 0; offset < read.getReadBases().length; offset++ ) { for( int offset = 0; offset < read.getReadBases().length; offset++ ) {

View File

@ -51,21 +51,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000"; String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{ return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "55a46d8f5d2f9acfa2d7659e18b6df43")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "387b41dc2221a1a4a782958944662b25")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "8e930f56a8905a5999af7d6ba8a92f91")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "b5e26902e76abbd59f94f65c70d18165")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8e87bee4bd6531b405082c4da785f1f5")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "a8a9c3f83269911cb61c5fe8fb98dc4a")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "b309a5f57b861d7f31cb76cdac4ff8a7")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "f43a0473101c63ae93444c300d843e81")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "4c75d47ed2cf93b499be8fbb29b24dfd")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "9e05e63339d4716584bfc717cab6bd0f")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "43b06e5568a89e4ce1dd9146ce580c89")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "1cf9b9c9c64617dc0f3d2f203f918dbe")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "25f4f48dba27475b0cd7c06ef0239aba")}, {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "aa1949a77bc3066fee551a217c970c0d")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "dfcba9acc32b4a1dfeceea135b48615a")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "f70d8b5358bc2f76696f14b7a807ede0")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "e8077b721f2e6f51c1945b6f6236835c")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4c0f63e06830681560a1e9f9aad9fe98")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "fbdc8d0fd312e3a7f49063c580cf5d92")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "be2812cd3dae3c326cf35ae3f1c8ad9e")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "4f47415628201a4f3c33e48ec066677b")}, {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "03c29a0c1d21f72b12daf51cec111599")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "1e89d2b88f4218363b9322b38e9536f2")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "7080b2cad02ec6e67ebc766b2dccebf8")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "a7beb0b16756257a274eecf73474ed90")}, {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "30e76055c16843b6e33e5b9bd8ced57c")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "dfcba9acc32b4a1dfeceea135b48615a")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "f70d8b5358bc2f76696f14b7a807ede0")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "2082c70e08f1c14290c3812021832f83")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5e657fd6a44dcdc7674b6e5a2de5dc83")},
}; };
} }

View File

@ -364,7 +364,7 @@ public class VariantContextAdaptors {
long end = hapmap.getEnd(); long end = hapmap.getEnd();
if ( deletionLength > 0 ) if ( deletionLength > 0 )
end += deletionLength; end += (deletionLength - 1);
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make(); VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
return vc; return vc;
} }

View File

@ -25,35 +25,41 @@
package org.broadinstitute.sting.gatk.walkers.bqsr; package org.broadinstitute.sting.gatk.walkers.bqsr;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.recalibration.*;
import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/** /**
* First pass of the base quality score recalibration -- Generates recalibration table based on various user-specified covariates (such as read group, reported quality score, machine cycle, and nucleotide context). * First pass of the base quality score recalibration -- Generates recalibration table based on various user-specified covariates (such as read group, reported quality score, machine cycle, and nucleotide context).
@ -103,17 +109,18 @@ import java.util.ArrayList;
* </pre> * </pre>
*/ */
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @DocumentedGATKFeature(groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class})
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) @BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
@By(DataSource.READS) @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file @PartitionBy(PartitionType.READ)
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSchedulable {
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> {
@ArgumentCollection @ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
@Advanced
@Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
public double BAQGOP = BAQ.DEFAULT_GOP;
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
private RecalibrationTables recalibrationTables; private RecalibrationTables recalibrationTables;
@ -124,12 +131,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
private int minimumQToUse; private int minimumQToUse;
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
private IndexedFastaSequenceFile referenceReader; // fasta reference reader for use with BAQ calculation
/** /**
* Parse the -cov arguments and create a list of covariates to be used here * Parse the -cov arguments and create a list of covariates to be used here
@ -137,6 +144,8 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
*/ */
public void initialize() { public void initialize() {
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
// check for unsupported access // check for unsupported access
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals) if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument"); throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
@ -185,13 +194,21 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
recalibrationEngine.initialize(requestedCovariates, recalibrationTables); recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN; minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
try {
// fasta reference reader for use with BAQ calculation
referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
} catch( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
}
} }
private RecalibrationEngine initializeRecalibrationEngine() { private RecalibrationEngine initializeRecalibrationEngine() {
final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class); final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class);
try { try {
Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null); final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null);
constructor.setAccessible(true); constructor.setAccessible(true);
return (RecalibrationEngine)constructor.newInstance(); return (RecalibrationEngine)constructor.newInstance();
} }
@ -200,60 +217,207 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
} }
} }
private boolean readHasBeenSkipped( final GATKSAMRecord read ) { private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) {
return read.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE); return read.getBaseQualities()[offset] < minimumQToUse;
}
private boolean isLowQualityBase( final PileupElement p ) {
return p.getQual() < minimumQToUse;
}
private boolean readNotSeen( final GATKSAMRecord read ) {
return !read.containsTemporaryAttribute(SEEN_ATTRIBUTE);
} }
/** /**
* For each read at this locus get the various covariate values and increment that location in the map based on * For each read at this locus get the various covariate values and increment that location in the map based on
* whether or not the base matches the reference at this particular location * whether or not the base matches the reference at this particular location
*
* @param tracker the reference metadata tracker
* @param ref the reference context
* @param context the alignment context
* @return returns 1, but this value isn't used in the reduce step
*/ */
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Long map( final ReferenceContext ref, final GATKSAMRecord originalRead, final RefMetaDataTracker metaDataTracker ) {
long countedSites = 0L;
// Only analyze sites not present in the provided known sites
if (tracker.getValues(RAC.knownSites).size() == 0) {
for (final PileupElement p : context.getBasePileup()) {
final GATKSAMRecord read = p.getRead();
final int offset = p.getOffset();
// This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases) final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(originalRead);
if (readHasBeenSkipped(read) || p.isInsertionAtBeginningOfRead() || isLowQualityBase(p) ) if( read.isEmpty() ) { return 0L; } // the whole read was inside the adaptor so skip it
continue;
if (readNotSeen(read)) { RecalUtils.parsePlatformForRead(read, RAC);
read.setTemporaryAttribute(SEEN_ATTRIBUTE, true); if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { // parse the solid color space and check for color no-calls
RecalUtils.parsePlatformForRead(read, RAC); return 0L; // skip this read completely
if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { }
read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true); read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
continue;
final boolean[] skip = calculateSkipArray(read, metaDataTracker); // skip known sites of variation as well as low quality and non-regular bases
final int[] isSNP = calculateIsSNP(read, ref, originalRead);
final int[] isInsertion = calculateIsIndel(read, EventType.BASE_INSERTION);
final int[] isDeletion = calculateIsIndel(read, EventType.BASE_DELETION);
final byte[] baqArray = calculateBAQArray(read);
if( baqArray != null ) { // some reads just can't be BAQ'ed
final double[] snpErrors = calculateFractionalErrorArray(isSNP, baqArray);
final double[] insertionErrors = calculateFractionalErrorArray(isInsertion, baqArray);
final double[] deletionErrors = calculateFractionalErrorArray(isDeletion, baqArray);
recalibrationEngine.updateDataForRead(read, skip, snpErrors, insertionErrors, deletionErrors);
return 1L;
} else {
return 0L;
}
}
protected boolean[] calculateSkipArray( final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker ) {
final byte[] bases = read.getReadBases();
final boolean[] skip = new boolean[bases.length];
final boolean[] knownSites = calculateKnownSites(read, metaDataTracker.getValues(RAC.knownSites));
for( int iii = 0; iii < bases.length; iii++ ) {
skip[iii] = !BaseUtils.isRegularBase(bases[iii]) || isLowQualityBase(read, iii) || knownSites[iii] || badSolidOffset(read, iii);
}
return skip;
}
protected boolean badSolidOffset( final GATKSAMRecord read, final int offset ) {
return ReadUtils.isSOLiDRead(read) && RAC.SOLID_RECAL_MODE != RecalUtils.SOLID_RECAL_MODE.DO_NOTHING && !RecalUtils.isColorSpaceConsistent(read, offset);
}
protected boolean[] calculateKnownSites( final GATKSAMRecord read, final List<Feature> features ) {
final int BUFFER_SIZE = 0;
final int readLength = read.getReadBases().length;
final boolean[] knownSites = new boolean[readLength];
Arrays.fill(knownSites, false);
for( final Feature f : features ) {
int featureStartOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getStart(), ReadUtils.ClippingTail.LEFT_TAIL, true); // BUGBUG: should I use LEFT_TAIL here?
if( featureStartOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureStartOnRead = 0; }
int featureEndOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getEnd(), ReadUtils.ClippingTail.LEFT_TAIL, true);
if( featureEndOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureEndOnRead = readLength; }
Arrays.fill(knownSites, Math.max(0, featureStartOnRead - BUFFER_SIZE), Math.min(readLength, featureEndOnRead + 1 + BUFFER_SIZE), true);
}
return knownSites;
}
// BUGBUG: can be merged with calculateIsIndel
protected static int[] calculateIsSNP( final GATKSAMRecord read, final ReferenceContext ref, final GATKSAMRecord originalRead ) {
final byte[] readBases = read.getReadBases();
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), read.getAlignmentStart() - originalRead.getAlignmentStart(), ref.getBases().length + read.getAlignmentEnd() - originalRead.getAlignmentEnd());
final int[] snp = new int[readBases.length];
int readPos = 0;
int refPos = 0;
for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
final int elementLength = ce.getLength();
switch (ce.getOperator()) {
case M:
case EQ:
case X:
for( int iii = 0; iii < elementLength; iii++ ) {
snp[readPos] = ( BaseUtils.basesAreEqual(readBases[readPos], refBases[refPos]) ? 0 : 1 );
readPos++;
refPos++;
} }
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); break;
} case D:
case N:
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it refPos += elementLength;
if (!ReadUtils.isSOLiDRead(read) || break;
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING || case I:
RecalUtils.isColorSpaceConsistent(read, offset)) case S: // ReferenceContext doesn't have the soft clipped bases!
// This base finally passed all the checks for a good base, so add it to the big data hashmap readPos += elementLength;
recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); break;
case H:
case P:
break;
default:
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
} }
countedSites++; }
return snp;
}
protected static int[] calculateIsIndel( final GATKSAMRecord read, final EventType mode ) {
final byte[] readBases = read.getReadBases();
final int[] indel = new int[readBases.length];
Arrays.fill(indel, 0);
int readPos = 0;
for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
final int elementLength = ce.getLength();
switch (ce.getOperator()) {
case M:
case EQ:
case X:
case S:
{
readPos += elementLength;
break;
}
case D:
{
final int index = ( read.getReadNegativeStrandFlag() ? readPos : ( readPos > 0 ? readPos - 1 : readPos ) );
indel[index] = ( mode.equals(EventType.BASE_DELETION) ? 1 : 0 );
break;
}
case I:
{
final boolean forwardStrandRead = !read.getReadNegativeStrandFlag();
if( forwardStrandRead ) {
indel[(readPos > 0 ? readPos - 1 : readPos)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 );
}
for (int iii = 0; iii < elementLength; iii++) {
readPos++;
}
if( !forwardStrandRead ) {
indel[(readPos < indel.length ? readPos : readPos - 1)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 );
}
break;
}
case N:
case H:
case P:
break;
default:
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
}
}
return indel;
}
protected static double[] calculateFractionalErrorArray( final int[] errorArray, final byte[] baqArray ) {
if(errorArray.length != baqArray.length ) {
throw new ReviewedStingException("Array length mismatch detected. Malformed read?");
} }
return countedSites; final byte NO_BAQ_UNCERTAINTY = (byte)'@';
final int BLOCK_START_UNSET = -1;
final double[] fractionalErrors = new double[baqArray.length];
Arrays.fill(fractionalErrors, 0.0);
boolean inBlock = false;
int blockStartIndex = BLOCK_START_UNSET;
int iii;
for( iii = 0; iii < fractionalErrors.length; iii++ ) {
if( baqArray[iii] == NO_BAQ_UNCERTAINTY ) {
if( !inBlock ) {
fractionalErrors[iii] = (double) errorArray[iii];
} else {
calculateAndStoreErrorsInBlock(iii, blockStartIndex, errorArray, fractionalErrors);
inBlock = false; // reset state variables
blockStartIndex = BLOCK_START_UNSET; // reset state variables
}
} else {
inBlock = true;
if( blockStartIndex == BLOCK_START_UNSET ) { blockStartIndex = iii; }
}
}
if( inBlock ) {
calculateAndStoreErrorsInBlock(iii-1, blockStartIndex, errorArray, fractionalErrors);
}
if( fractionalErrors.length != errorArray.length ) {
throw new ReviewedStingException("Output array length mismatch detected. Malformed read?");
}
return fractionalErrors;
}
private static void calculateAndStoreErrorsInBlock( final int iii,
final int blockStartIndex,
final int[] errorArray,
final double[] fractionalErrors ) {
int totalErrors = 0;
for( int jjj = Math.max(0,blockStartIndex-1); jjj <= iii; jjj++ ) {
totalErrors += errorArray[jjj];
}
for( int jjj = Math.max(0, blockStartIndex-1); jjj <= iii; jjj++ ) {
fractionalErrors[jjj] = ((double) totalErrors) / ((double)(iii - Math.max(0,blockStartIndex-1) + 1));
}
}
private byte[] calculateBAQArray( final GATKSAMRecord read ) {
baq.baqRead(read, referenceReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
return BAQ.getBAQTag(read);
} }
/** /**
@ -286,12 +450,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
generateReport(); generateReport();
logger.info("...done!"); logger.info("...done!");
if (RAC.RECAL_PDF_FILE != null) { if ( RAC.RECAL_PDF_FILE != null ) {
logger.info("Generating recalibration plots..."); logger.info("Generating recalibration plots...");
generatePlots(); generatePlots();
} }
logger.info("Processed: " + result + " sites"); logger.info("Processed: " + result + " reads");
} }
private void generatePlots() { private void generatePlots() {
@ -304,7 +468,6 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates); RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
} }
/** /**
* go through the quality score table and use the # observations and the empirical quality score * go through the quality score table and use the # observations and the empirical quality score
* to build a quality score histogram for quantization. Then use the QuantizeQual algorithm to * to build a quality score histogram for quantization. Then use the QuantizeQual algorithm to
@ -318,4 +481,3 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates); RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates);
} }
} }

View File

@ -33,7 +33,5 @@ public interface RecalibrationEngine {
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables); public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase);
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors); public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors);
} }

View File

@ -46,41 +46,30 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
this.recalibrationTables = recalibrationTables; this.recalibrationTables = recalibrationTables;
} }
/**
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
* adding one to the number of observations and potentially one to the number of mismatches for mismatches only.
*
* @param pileupElement The pileup element to update
* @param refBase The reference base at this locus
*/
@Override
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
final int offset = pileupElement.getOffset();
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
final byte qual = pileupElement.getQual();
final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
// TODO: should this really be combine rather than increment?
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
@Override @Override
public void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) { public void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
throw new UnsupportedOperationException("Delocalized BQSR is not available in the GATK-lite version"); for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( !skip[offset] ) {
final ReadCovariates readCovariates = covariateKeySetFrom(read);
final byte qual = read.getBaseQualities()[offset];
final double isError = snpErrors[offset];
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
} }
/** /**
@ -90,10 +79,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
* @param isError whether or not the observation is an error * @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error * @return a new RecalDatum object with the observation and the error
*/ */
protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) {
return new RecalDatum(1, isError ? 1:0, reportedQual);
}
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) { protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
return new RecalDatum(1, isError, reportedQual); return new RecalDatum(1, isError, reportedQual);
} }
@ -108,46 +93,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE); return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE);
} }
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error status for this event
* @param keys location in table of our item
*/
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final boolean isError, final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
// There are three cases here:
//
// 1. The original get succeeded (existingDatum != null) because there already was an item in this position.
// In this case we can just increment the existing item.
//
// 2. The original get failed (existingDatum == null), and we successfully put a new item in this position
// and are done.
//
// 3. The original get failed (existingDatum == null), AND the put fails because another thread comes along
// in the interim and puts an item in this position. In this case we need to do another get after the
// failed put to get the item the other thread put here and increment it.
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(isError);
}
}
/** /**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there * Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one. * if there isn't already one.
@ -177,36 +122,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
} }
} }
/**
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
* new item there if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error status for this event
* @param keys location in table of our item
*/
protected void combineDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final boolean isError, final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
final RecalDatum newDatum = createDatumObject(qual, isError);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(newDatum, keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and combine it with our item (item is guaranteed to exist at this point)
table.get(keys).combine(newDatum);
}
}
else {
// Easy case: already an item here, so combine it with our item
existingDatum.combine(newDatum);
}
}
/** /**
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a * Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
* new item there if there isn't already one. * new item there if there isn't already one.

View File

@ -46,16 +46,20 @@ import java.util.Set;
/** /**
* Strictly validates a variants file. * Validates a VCF file with an extra strict set of criteria.
* *
* <p> * <p>
* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it. * ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it.
* Checks include the correctness of the reference base(s), accuracy of AC & AN values, tests against rsIDs * In addition to standard adherence to the VCF specification, this tool performs extra checks to make ensure
* when a dbSNP file is provided, and that all alternate alleles are present in at least one sample. * the information contained within the file is correct. Checks include the correctness of the reference base(s),
* accuracy of AC & AN values, tests against rsIDs when a dbSNP file is provided, and that all alternate alleles
* are present in at least one sample.
*
* If you are looking simply to test the adherence to the VCF specification, use --validationType NONE.
* *
* <h2>Input</h2> * <h2>Input</h2>
* <p> * <p>
* A variant set to filter. * A variant set to validate.
* </p> * </p>
* *
* <h2>Examples</h2> * <h2>Examples</h2>
@ -79,10 +83,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
public enum ValidationType { public enum ValidationType {
ALL, REF, IDS, ALLELES, CHR_COUNTS ALL, REF, IDS, ALLELES, CHR_COUNTS, NONE
} }
@Hidden
@Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false) @Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false)
protected ValidationType type = ValidationType.ALL; protected ValidationType type = ValidationType.ALL;
@ -172,7 +175,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
numErrors++; numErrors++;
logger.warn("***** " + e.getMessage() + " *****"); logger.warn("***** " + e.getMessage() + " *****");
} else { } else {
throw new UserException.MalformedFile(file, e.getMessage()); throw new UserException.FailsStrictValidation(file, e.getMessage());
} }
} }
} }

View File

@ -160,7 +160,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2); Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); alleleMap.put(RawHapMapFeature.INSERTION, Allele.create((char)ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
hapmap.setActualAlleles(alleleMap); hapmap.setActualAlleles(alleleMap);
// also, use the correct positioning for insertions // also, use the correct positioning for insertions

View File

@ -283,6 +283,12 @@ public class UserException extends ReviewedStingException {
} }
} }
public static class FailsStrictValidation extends UserException {
public FailsStrictValidation(File f, String message) {
super(String.format("File %s fails strict validation: %s", f.getAbsolutePath(), message));
}
}
public static class MalformedFile extends UserException { public static class MalformedFile extends UserException {
public MalformedFile(String message) { public MalformedFile(String message) {
super(String.format("Unknown file is malformed: %s", message)); super(String.format("Unknown file is malformed: %s", message));

View File

@ -1071,15 +1071,17 @@ public class VariantContext implements Feature { // to enable tribble integratio
if ( g.isCalled() ) if ( g.isCalled() )
observedAlleles.addAll(g.getAlleles()); observedAlleles.addAll(g.getAlleles());
} }
if ( observedAlleles.contains(Allele.NO_CALL) )
observedAlleles.remove(Allele.NO_CALL);
if ( reportedAlleles.size() != observedAlleles.size() ) if ( reportedAlleles.size() != observedAlleles.size() )
throw new TribbleException.InternalCodecException(String.format("the ALT allele(s) for the record at position %s:%d do not match what is observed in the per-sample genotypes", getChr(), getStart())); throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getChr(), getStart()));
int originalSize = reportedAlleles.size(); int originalSize = reportedAlleles.size();
// take the intersection and see if things change // take the intersection and see if things change
observedAlleles.retainAll(reportedAlleles); observedAlleles.retainAll(reportedAlleles);
if ( observedAlleles.size() != originalSize ) if ( observedAlleles.size() != originalSize )
throw new TribbleException.InternalCodecException(String.format("the ALT allele(s) for the record at position %s:%d do not match what is observed in the per-sample genotypes", getChr(), getStart())); throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getChr(), getStart()));
} }
public void validateChromosomeCounts() { public void validateChromosomeCounts() {

View File

@ -361,7 +361,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation + baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1, "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba")); Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec); executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
} }
@ -451,7 +451,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReducedBam() { public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("1b711c0966a2f76eb21801e73b76b758")); Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba"));
executeTest("test calling on a ReducedRead BAM", spec); executeTest("test calling on a ReducedRead BAM", spec);
} }

View File

@ -354,7 +354,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test @Test
public void testCompOverlap() { public void testCompOverlap() {
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + variantEvalTestDataRoot + "pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + variantEvalTestDataRoot + "pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + variantEvalTestDataRoot + "pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + variantEvalTestDataRoot + "pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("59ad39e03678011b5f62492fa83ede04")); WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d0d9208060e69e157dac3bf01bdd83b0"));
executeTestParallel("testCompOverlap",spec); executeTestParallel("testCompOverlap",spec);
} }

View File

@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
} }
VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf", VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
"f360ce3eb2b0b887301be917a9843e2b", // tranches "4d08c8eee61dd1bdea8c5765f34e41f0", // tranches
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file "ce396fe4045e020b61471f6737dff36e", // recal file
"afa297c743437551cc2bd36ddd6d6d75"); // cut VCF "4f59bd61be900b25c6ecedaa68b9c8de"); // cut VCF
@DataProvider(name = "VRTest") @DataProvider(name = "VRTest")
public Object[][] createData1() { public Object[][] createData1() {
@ -75,9 +75,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
} }
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
"a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches "6a1eef4d02857dbb117a15420b5c0ce9", // tranches
"74c10fc15f9739a938b7138909fbde04", // recal file "238366af66b05b6d21749e799c25353d", // recal file
"c30d163871a37f2bbf8ee7f761e870b4"); // cut VCF "3928d6bc5007becf52312ade70f14c42"); // cut VCF
@DataProvider(name = "VRBCFTest") @DataProvider(name = "VRBCFTest")
public Object[][] createVRBCFTest() { public Object[][] createVRBCFTest() {

View File

@ -20,7 +20,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
+ b37hapmapGenotypes + " -disc " + testFile + b37hapmapGenotypes + " -disc " + testFile
+ " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING", + " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",
1, 1,
Arrays.asList("d88bdae45ae0e74e8d8fd196627e612c") Arrays.asList("954415f84996d27b07d00855e96d33a2")
); );
spec.disableShadowBCF(); spec.disableShadowBCF();
@ -49,7 +49,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
+ b37hapmapGenotypes + " -disc " + testFile + b37hapmapGenotypes + " -disc " + testFile
+ " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING", + " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",
1, 1,
Arrays.asList("c0b937edb6a8b6392d477511d4f1ebcf") Arrays.asList("ca1b5226eaeaffb78d4abd9d2ee10c43")
); );
spec.disableShadowBCF(); spec.disableShadowBCF();

View File

@ -33,6 +33,8 @@ import java.util.Arrays;
public class ValidateVariantsIntegrationTest extends WalkerTest { public class ValidateVariantsIntegrationTest extends WalkerTest {
protected static final String emptyMd5 = "d41d8cd98f00b204e9800998ecf8427e";
public static String baseTestString(String file, String type) { public static String baseTestString(String file, String type) {
return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + privateTestDir + file + " --validationType " + type; return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + privateTestDir + file + " --validationType " + type;
} }
@ -42,7 +44,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleGood.vcf", "ALL"), baseTestString("validationExampleGood.vcf", "ALL"),
0, 0,
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") Arrays.asList(emptyMd5)
); );
executeTest("test good file", spec); executeTest("test good file", spec);
@ -53,7 +55,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "REF"), baseTestString("validationExampleBad.vcf", "REF"),
0, 0,
UserException.MalformedFile.class UserException.FailsStrictValidation.class
); );
executeTest("test bad ref base #1", spec); executeTest("test bad ref base #1", spec);
@ -64,7 +66,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad2.vcf", "REF"), baseTestString("validationExampleBad2.vcf", "REF"),
0, 0,
UserException.MalformedFile.class UserException.FailsStrictValidation.class
); );
executeTest("test bad ref base #2", spec); executeTest("test bad ref base #2", spec);
@ -75,7 +77,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "CHR_COUNTS"), baseTestString("validationExampleBad.vcf", "CHR_COUNTS"),
0, 0,
UserException.MalformedFile.class UserException.FailsStrictValidation.class
); );
executeTest("test bad chr counts #1", spec); executeTest("test bad chr counts #1", spec);
@ -86,7 +88,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad2.vcf", "CHR_COUNTS"), baseTestString("validationExampleBad2.vcf", "CHR_COUNTS"),
0, 0,
UserException.MalformedFile.class UserException.FailsStrictValidation.class
); );
executeTest("test bad chr counts #2", spec); executeTest("test bad chr counts #2", spec);
@ -97,7 +99,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129, baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129,
0, 0,
UserException.MalformedFile.class UserException.FailsStrictValidation.class
); );
executeTest("test bad RS ID", spec); executeTest("test bad RS ID", spec);
@ -108,7 +110,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "ALLELES"), baseTestString("validationExampleBad.vcf", "ALLELES"),
0, 0,
UserException.MalformedFile.class UserException.FailsStrictValidation.class
); );
executeTest("test bad alt allele", spec); executeTest("test bad alt allele", spec);
@ -119,18 +121,29 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad3.vcf", "REF"), baseTestString("validationExampleBad3.vcf", "REF"),
0, 0,
UserException.MalformedFile.class UserException.FailsStrictValidation.class
); );
executeTest("test bad ref allele in deletion", spec); executeTest("test bad ref allele in deletion", spec);
} }
@Test
public void testNoValidation() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "NONE"),
0,
Arrays.asList(emptyMd5)
);
executeTest("test no validation", spec);
}
@Test @Test
public void testComplexEvents() { public void testComplexEvents() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("complexEvents.vcf", "ALL"), baseTestString("complexEvents.vcf", "ALL"),
0, 0,
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") Arrays.asList(emptyMd5)
); );
executeTest("test validating complex events", spec); executeTest("test validating complex events", spec);

View File

@ -141,8 +141,8 @@ class GATKResourcesBundle extends QScript {
// CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT) // CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
// //
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf", addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/CEUTrio.HiSeq.WGS.b37.snps_and_indels.recalibrated.filtered.phased.CURRENT.vcf",
"CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false)) "CEUTrio.HiSeq.WGS.b37.bestPractices.phased",b37,true,false))
// //
// example call set for wiki tutorial // example call set for wiki tutorial
@ -317,6 +317,7 @@ class GATKResourcesBundle extends QScript {
class UG(@Input bam: File, @Input ref: File, @Input outVCF: File) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS { class UG(@Input bam: File, @Input ref: File, @Input outVCF: File) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS {
this.input_file = List(bam) this.input_file = List(bam)
this.reference_sequence = ref this.reference_sequence = ref
this.intervalsString ++= List("20");
this.out = outVCF this.out = outVCF
} }

View File

@ -60,7 +60,7 @@ class DataProcessingPipelineTest {
" -bwa /home/unix/carneiro/bin/bwa", " -bwa /home/unix/carneiro/bin/bwa",
" -bwape ", " -bwape ",
" -p " + projectName).mkString " -p " + projectName).mkString
spec.fileMD5s += testOut -> "6e70efbe6bafc3fedd60bd406bd201db" spec.fileMD5s += testOut -> "9fca827ecc8436465b831bb6f879357a"
PipelineTest.executeTest(spec) PipelineTest.executeTest(spec)
} }

View File

@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest {
" -blasr ", " -blasr ",
" -test ", " -test ",
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString " -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString
spec.fileMD5s += testOut -> "61b06e8b78a93e6644657e6d38851084" spec.fileMD5s += testOut -> "b84f9c45e045685067ded681d5e6224c"
PipelineTest.executeTest(spec) PipelineTest.executeTest(spec)
} }
} }