Merge remote-tracking branch 'unstable/master'

This commit is contained in:
Geraldine Van der Auwera 2017-11-09 02:09:51 -05:00
commit e4c24188b2
23 changed files with 251 additions and 89 deletions

View File

@ -110,7 +110,15 @@ public class VariantDataManager {
return data; return data;
} }
public void normalizeData(final boolean calculateMeans) { /**
* Normalize annotations to mean 0 and standard deviation 1.
* Order the variant annotations by the provided list {@code theOrder} or standard deviation.
*
* @param calculateMeans Boolean indicating whether or not to calculate the means
* @param theOrder a list of integers specifying the desired annotation order. If this is null
* annotations will get sorted in decreasing size of their standard deviations.
*/
public void normalizeData(final boolean calculateMeans, List<Integer> theOrder) {
boolean foundZeroVarianceAnnotation = false; boolean foundZeroVarianceAnnotation = false;
for( int iii = 0; iii < meanVector.length; iii++ ) { for( int iii = 0; iii < meanVector.length; iii++ ) {
final double theMean, theSTD; final double theMean, theSTD;
@ -150,7 +158,10 @@ public class VariantDataManager {
// re-order the data by increasing standard deviation so that the results don't depend on the order things were specified on the command line // re-order the data by increasing standard deviation so that the results don't depend on the order things were specified on the command line
// standard deviation over the training points is used as a simple proxy for information content, perhaps there is a better thing to use here // standard deviation over the training points is used as a simple proxy for information content, perhaps there is a better thing to use here
final List<Integer> theOrder = calculateSortOrder(meanVector); // or use the serialized report's annotation order via the argument theOrder
if (theOrder == null){
theOrder = calculateSortOrder(meanVector);
}
annotationKeys = reorderList(annotationKeys, theOrder); annotationKeys = reorderList(annotationKeys, theOrder);
varianceVector = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(varianceVector), theOrder)); varianceVector = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(varianceVector), theOrder));
meanVector = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(meanVector), theOrder)); meanVector = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(meanVector), theOrder));
@ -158,7 +169,8 @@ public class VariantDataManager {
datum.annotations = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(datum.annotations), theOrder)); datum.annotations = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(datum.annotations), theOrder));
datum.isNull = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(datum.isNull), theOrder)); datum.isNull = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(datum.isNull), theOrder));
} }
logger.info("Annotations are now ordered by their information content: " + annotationKeys.toString()); logger.info("Annotation order is: " + annotationKeys.toString());
} }
public double[] getMeanVector() { public double[] getMeanVector() {

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.variantrecalibration; package org.broadinstitute.gatk.tools.walkers.variantrecalibration;
import com.google.common.annotations.VisibleForTesting;
import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -312,6 +313,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false) @Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
protected Boolean TRUST_ALL_POLYMORPHIC = false; protected Boolean TRUST_ALL_POLYMORPHIC = false;
@VisibleForTesting
protected List<Integer> annotationOrder = null;
///////////////////////////// /////////////////////////////
// Private Member Variables // Private Member Variables
///////////////////////////// /////////////////////////////
@ -372,18 +376,15 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
final GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix"); final GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
final GATKReportTable anMeansTable = reportIn.getTable("AnnotationMeans"); final GATKReportTable anMeansTable = reportIn.getTable("AnnotationMeans");
final GATKReportTable anStDevsTable = reportIn.getTable("AnnotationStdevs"); final GATKReportTable anStDevsTable = reportIn.getTable("AnnotationStdevs");
final int numAnnotations = dataManager.annotationKeys.size();
if( numAnnotations != pmmTable.getNumColumns()-1 || numAnnotations != nmmTable.getNumColumns()-1 ) { // -1 because the first column is the gaussian number. orderAndValidateAnnotations(anMeansTable, dataManager.annotationKeys);
throw new UserException.CommandLineException( "Annotations specified on the command line do not match annotations in the model report." );
}
final Map<String, Double> anMeans = getMapFromVectorTable(anMeansTable); final Map<String, Double> anMeans = getMapFromVectorTable(anMeansTable);
final Map<String, Double> anStdDevs = getMapFromVectorTable(anStDevsTable); final Map<String, Double> anStdDevs = getMapFromVectorTable(anStDevsTable);
dataManager.setNormalization(anMeans, anStdDevs); dataManager.setNormalization(anMeans, anStdDevs);
goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, numAnnotations); goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, annotationOrder.size());
badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, numAnnotations); badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, annotationOrder.size());
} }
final Set<VCFHeaderLine> hInfo = new HashSet<>(); final Set<VCFHeaderLine> hInfo = new HashSet<>();
@ -401,6 +402,32 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
} }
/**
* Order and validate annotations according to the annotations in the serialized model
* Annotations on the command line must be the same as those in the model report or this will throw an exception.
* Sets the {@code annotationOrder} list to map from command line order to the model report's order.
* n^2 because we typically use 7 or less annotations.
* @param annotationTable GATKReportTable of annotations read from the serialized model file
*/
protected void orderAndValidateAnnotations(final GATKReportTable annotationTable, final List<String> annotationKeys){
annotationOrder = new ArrayList<Integer>(annotationKeys.size());
for (int i = 0; i < annotationTable.getNumRows(); i++){
String serialAnno = (String)annotationTable.get(i, "Annotation");
for (int j = 0; j < annotationKeys.size(); j++) {
if (serialAnno.equals( annotationKeys.get(j) )){
annotationOrder.add(j);
}
}
}
if(annotationOrder.size() != annotationTable.getNumRows() || annotationOrder.size() != annotationKeys.size()) {
final String errorMsg = "Annotations specified on the command line:"+annotationKeys.toString() +" do not match annotations in the model report:"+inputModel;
throw new UserException.CommandLineException(errorMsg);
}
}
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
// //
@ -518,7 +545,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
for (int i = 1; i <= max_attempts; i++) { for (int i = 1; i <= max_attempts; i++) {
try { try {
dataManager.setData(reduceSum); dataManager.setData(reduceSum);
dataManager.normalizeData(inputModel.isEmpty()); // Each data point is now (x - mean) / standard deviation dataManager.normalizeData(inputModel.isEmpty(), annotationOrder); // Each data point is now (x - mean) / standard deviation
final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData(); final List<VariantDatum> positiveTrainingData = dataManager.getTrainingData();
final List<VariantDatum> negativeTrainingData; final List<VariantDatum> negativeTrainingData;

View File

@ -198,13 +198,13 @@ public class BQSRIntegrationTest extends WalkerTest {
public Object[][] createPRTestData() { public Object[][] createPRTestData() {
List<Object[]> tests = new ArrayList<Object[]>(); List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{1, new PRTest(" -qq -1", "8a38828e3b14ce067614d4248e3ea95a")}); tests.add(new Object[]{1, new PRTest(" -qq -1", "e9969c6d7fd35d96b82c691b4ced5443")});
tests.add(new Object[]{1, new PRTest(" -qq 6", "e4f23250b2c87f0d68d042cc3d2ec1d3")}); tests.add(new Object[]{1, new PRTest(" -qq 6", "b4b271acac003b8504b530c5526d43ad")});
tests.add(new Object[]{1, new PRTest(" -DIQ", "2dfa45f004d3a371fd290ed67fbdf573")}); tests.add(new Object[]{1, new PRTest(" -DIQ", "ec795d16746c3bdf5e54c57337e6eed6")});
tests.add(new Object[]{1, new PRTest(" --useOriginalQualities -SQQ 10 -SQQ 20 -SQQ 30", "4882354d9e603f9bbe7c9591bba0a573")}); tests.add(new Object[]{1, new PRTest(" --useOriginalQualities -SQQ 10 -SQQ 20 -SQQ 30", "d50df5a7bcc8f195479f64285c124d18")});
tests.add(new Object[]{1, new PRTest(" --useOriginalQualities -SQQ 10 -SQQ 20 -SQQ 30 -RDQ", "6ffdfc4593e83f7c234b6249412433af")}); tests.add(new Object[]{1, new PRTest(" --useOriginalQualities -SQQ 10 -SQQ 20 -SQQ 30 -RDQ", "58b9df85e49eb3ee228f0d581c168791")});
for ( final int nct : Arrays.asList(1, 2, 4) ) { for ( final int nct : Arrays.asList(1, 2, 4) ) {
tests.add(new Object[]{nct, new PRTest("", "6451093cadfc14d7359617b2a7ea6db8")}); tests.add(new Object[]{nct, new PRTest("", "f4704ba2894ec0aec8a55ce6b361f768")});
} }
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});

View File

@ -138,7 +138,7 @@ public class MuTect2IntegrationTest extends WalkerTest {
public void testTruePositivesDream3TrackedDropped() { public void testTruePositivesDream3TrackedDropped() {
M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "", M2TestWithDroppedReads(DREAM3_TUMOR_BAM, DREAM3_NORMAL_BAM, "21:10935369", "",
"48a446d47bb10434cb7f0ee726d15721", "48a446d47bb10434cb7f0ee726d15721",
"6ecaeb74893249dfa5723b2266c957e2"); "265a72d3f79bb0fe054a847ab0c01c67");
} }
/** /**

View File

@ -133,7 +133,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeBAMOutFlags() throws IOException { public void testHaplotypeBAMOutFlags() throws IOException {
final String md5BAMOut = "69aae17f8cd384666ec7c3c1f3d3eb57"; final String md5BAMOut = "0934466fa2b8648af9e6267286a57151";
HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "df622103b0a6917f2299b1acfd0ed0ac", md5BAMOut); HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "df622103b0a6917f2299b1acfd0ed0ac", md5BAMOut);
validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(NA12878_BAM))), md5BAMOut); validateForwardedProgramRecords(new ArrayList<>(Arrays.asList(new File(NA12878_BAM))), md5BAMOut);
} }
@ -330,7 +330,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testLeftAlignmentBamOutBugFix() throws IOException { public void testLeftAlignmentBamOutBugFix() throws IOException {
final String outputVCF = createTempFile("temp", ".vcf").getAbsolutePath(); final String outputVCF = createTempFile("temp", ".vcf").getAbsolutePath();
final String md5BAMOut = "27e729df3b166c81792a62a5b57ef7b3"; final String md5BAMOut = "60db3996cd37a863d8b93218fcbf1c8b";
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT) final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, LEFT_ALIGNMENT_BAMOUT_TEST_INPUT)
+ " --no_cmdline_in_header -bamout %s -o " + outputVCF + " -L 1:11740000-11740700 --allowNonUniqueKmersInRef"; + " --no_cmdline_in_header -bamout %s -o " + outputVCF + " -L 1:11740000-11740700 --allowNonUniqueKmersInRef";
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(md5BAMOut)); final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(md5BAMOut));
@ -541,7 +541,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerReadPosRankSum() throws IOException { public void testHaplotypeCallerReadPosRankSum() throws IOException {
final File testBAM = new File(privateTestDir + "testReadPos.snippet.bam"); final File testBAM = new File(privateTestDir + "testReadPos.snippet.bam");
final String md5Variants = "e664b7a9da71cf81e14648ac7e698eea"; final String md5Variants = "e664b7a9da71cf81e14648ac7e698eea";
final String md5BAMOut = "3ef35732e49980093ad445e3ac5731fa"; final String md5BAMOut = "74f2cab2d2d0d999b54456b73e597d6c";
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 1:3753063 -ip 100 ", REF, testBAM) + final String base = String.format("-T HaplotypeCaller -R %s -I %s -L 1:3753063 -ip 100 ", REF, testBAM) +
" --no_cmdline_in_header -o %s -bamout %s"; " --no_cmdline_in_header -o %s -bamout %s";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5Variants, md5BAMOut)); final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList(md5Variants, md5BAMOut));

View File

@ -87,7 +87,7 @@ public class SplitNCigarReadsIntegrationTest extends WalkerTest {
public void testSplitsWithOverhangs() { public void testSplitsWithOverhangs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T SplitNCigarReads -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1, "-T SplitNCigarReads -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1,
Arrays.asList("72fbeb2043f005e1698e21563f0625a9")); Arrays.asList("b721d997bd09873a244fee97c1e58af1"));
executeTest("test splits with overhangs", spec); executeTest("test splits with overhangs", spec);
} }
@ -95,7 +95,7 @@ public class SplitNCigarReadsIntegrationTest extends WalkerTest {
public void testSplitsFixNDN() { public void testSplitsFixNDN() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T SplitNCigarReads -R " + b37KGReference + " -I " + privateTestDir + "splitNCigarReadsSnippet.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS -fixNDN", 1, "-T SplitNCigarReads -R " + b37KGReference + " -I " + privateTestDir + "splitNCigarReadsSnippet.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS -fixNDN", 1,
Arrays.asList("add7012d5e814d6cfd32f6cac1eb8ce3")); Arrays.asList("9aa80944c2c7ee8a1f259907e3d8b51c"));
executeTest("test fix NDN", spec); executeTest("test fix NDN", spec);
} }
@ -103,7 +103,7 @@ public class SplitNCigarReadsIntegrationTest extends WalkerTest {
public void testSplitsWithOverhangsNotClipping() { public void testSplitsWithOverhangsNotClipping() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T SplitNCigarReads --doNotFixOverhangs -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1, "-T SplitNCigarReads --doNotFixOverhangs -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1,
Arrays.asList("6a55ac0a945e010bf03e1dd8f7749417")); Arrays.asList("4134e1ff0eaaa9986e19526a7c3a8319"));
executeTest("test splits with overhangs not clipping", spec); executeTest("test splits with overhangs not clipping", spec);
} }
@ -111,7 +111,7 @@ public class SplitNCigarReadsIntegrationTest extends WalkerTest {
public void testSplitsWithOverhangs0Mismatches() { public void testSplitsWithOverhangs0Mismatches() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T SplitNCigarReads --maxMismatchesInOverhang 0 -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1, "-T SplitNCigarReads --maxMismatchesInOverhang 0 -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1,
Arrays.asList("8a577047955ff9edca3caf1f6e545d3e")); Arrays.asList("94ebc9fbd64684e50c5f54ad5ff042b6"));
executeTest("test splits with overhangs 0 mismatches", spec); executeTest("test splits with overhangs 0 mismatches", spec);
} }
@ -119,7 +119,7 @@ public class SplitNCigarReadsIntegrationTest extends WalkerTest {
public void testSplitsWithOverhangs5BasesInOverhang() { public void testSplitsWithOverhangs5BasesInOverhang() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T SplitNCigarReads --maxBasesInOverhang 5 -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1, "-T SplitNCigarReads --maxBasesInOverhang 5 -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -o %s --no_pg_tag -U ALLOW_N_CIGAR_READS", 1,
Arrays.asList("bdd822868b88063cf50c6336ed1a5e64")); Arrays.asList("b737b2dfb22a608ee3def6137fed9414"));
executeTest("test splits with overhangs 5 bases in overhang", spec); executeTest("test splits with overhangs 5 bases in overhang", spec);
} }
} }

View File

@ -64,7 +64,7 @@ public class SimulateReadsForVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s", "-T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s",
1, 1,
Arrays.asList("d929369b9095420a8aaff2595ec2f80a")); Arrays.asList("b5680f835aff1da6e1e60123d39f2371"));
executeTest("testVariants", spec); executeTest("testVariants", spec);
} }
@ -74,7 +74,7 @@ public class SimulateReadsForVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-RL 70 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s", "-RL 70 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s",
1, 1,
Arrays.asList("bbc65e4f8bd3a1656616476a1e190ecf")); Arrays.asList("177d32e0b13bd40aaeef71e8e5ffefe7"));
executeTest("testReadLength", spec); executeTest("testReadLength", spec);
} }
@ -84,7 +84,7 @@ public class SimulateReadsForVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-ER 40 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s", "-ER 40 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s",
1, 1,
Arrays.asList("cb0e4b11bbd1b5a154ad6c99541cd017")); Arrays.asList("7353b4d148221f4f4975f07712413e18"));
executeTest("testErrorRate", spec); executeTest("testErrorRate", spec);
} }
@ -94,7 +94,7 @@ public class SimulateReadsForVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-RGPL SOLID -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s", "-RGPL SOLID -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forSimulation.vcf -o %s",
1, 1,
Arrays.asList("2b5c6cda9a434c9e25e5da5599eeae51")); Arrays.asList("6f7a2ff3aa3c2e24a00f04dc770a4140"));
executeTest("testPlatformTag", spec); executeTest("testPlatformTag", spec);
} }
@ -105,7 +105,7 @@ public class SimulateReadsForVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SimulateReadsForVariants --no_pg_tag --useAFAsAlleleFraction -DP 100 -R " + b37KGReference + " -V " + publicTestDir + "forAlleleFractionSimulation.vcf -o %s", "-T SimulateReadsForVariants --no_pg_tag --useAFAsAlleleFraction -DP 100 -R " + b37KGReference + " -V " + publicTestDir + "forAlleleFractionSimulation.vcf -o %s",
1, 1,
Arrays.asList("1ae2c354718b470e30b44d5e59cb9944")); Arrays.asList("f19d4b62269512fff0dcce21874c0d43"));
executeTest("testAlleleFraction", spec); executeTest("testAlleleFraction", spec);
} }
@ -116,7 +116,7 @@ public class SimulateReadsForVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forLongInsert.vcf -o %s", "-T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forLongInsert.vcf -o %s",
1, 1,
Arrays.asList("5c069bff8efb988660c7f6d28a3117fc")); Arrays.asList("052f1b644848cfd058cd2aeb0b6f2fd2"));
executeTest("testLongInsertFailure", spec); executeTest("testLongInsertFailure", spec);
} }
@ -126,7 +126,7 @@ public class SimulateReadsForVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-RL 269 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forLongInsert.vcf -o %s", "-RL 269 -T SimulateReadsForVariants --no_pg_tag -R " + b37KGReference + " -V " + publicTestDir + "forLongInsert.vcf -o %s",
1, 1,
Arrays.asList("0657f6a692d22b5e2b7f5832710042e4")); Arrays.asList("33f7da2e0b711a9ad28cb49c60e648be"));
executeTest("testLongInsertSuccess", spec); executeTest("testLongInsertSuccess", spec);
} }

View File

@ -51,6 +51,8 @@
package org.broadinstitute.gatk.tools.walkers.variantrecalibration; package org.broadinstitute.gatk.tools.walkers.variantrecalibration;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.exceptions.UserException.CommandLineException;
import org.broadinstitute.gatk.utils.variant.VCIterable; import org.broadinstitute.gatk.utils.variant.VCIterable;
import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
@ -60,6 +62,7 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -390,5 +393,73 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
new File(outputFile.getAbsolutePath() + ".pdf").deleteOnExit(); new File(outputFile.getAbsolutePath() + ".pdf").deleteOnExit();
} }
} }
@Test
public void testVQSRAnnotationOrder() throws IOException {
final String inputFile = privateTestDir + "oneSNP.vcf";
final String exacModelReportFilename = privateTestDir + "subsetExAC.snps_model.report";
final String annoOrderRecal = privateTestDir + "anno_order.recal";
final String annoOrderTranches = privateTestDir + "anno_order.tranches";
final String goodMd5 = "d41d8cd98f00b204e9800998ecf8427e";
final String base = "-R " + b37KGReference +
" -T VariantRecalibrator" +
" -input " + inputFile +
" -L 1:110201699" +
" -resource:truth=true,training=true,prior=15.0 " + inputFile +
" -an FS -an ReadPosRankSum -an MQ -an MQRankSum -an QD -an SOR"+
" --recal_file " + annoOrderRecal +
" -tranchesFile " + annoOrderTranches +
" --input_model " + exacModelReportFilename +
" -ignoreAllFilters -mode SNP" +
" --no_cmdline_in_header" ;
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(goodMd5));
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
List<File> outputFiles = executeTest("testVQSRAnnotationOrder", spec).getFirst();
setPDFsForDeletion(outputFiles);
final String base2 = "-R " + b37KGReference +
" -T VariantRecalibrator" +
" -input " + inputFile +
" -L 1:110201699" +
" -resource:truth=true,training=true,prior=15.0 " + inputFile +
" -an ReadPosRankSum -an MQ -an MQRankSum -an QD -an SOR -an FS "+
" --recal_file " + annoOrderRecal +
" -tranchesFile " + annoOrderTranches +
" --input_model " + exacModelReportFilename +
" -ignoreAllFilters -mode SNP" +
" --no_cmdline_in_header" ;
final WalkerTestSpec spec2 = new WalkerTestSpec(base2, 1, Arrays.asList(goodMd5));
spec2.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
outputFiles = executeTest("testVQSRAnnotationOrder2", spec2).getFirst();
setPDFsForDeletion(outputFiles);
}
@Test(expectedExceptions={RuntimeException.class, CommandLineException.class})
public void testVQSRAnnotationMismatch() throws IOException {
final String inputFile = privateTestDir + "oneSNP.vcf";
final String exacModelReportFilename = privateTestDir + "subsetExAC.snps_model.report";
final String annoOrderRecal = privateTestDir + "anno_order.recal";
final String annoOrderTranches = privateTestDir + "anno_order.tranches";
final String goodMd5 = "d41d8cd98f00b204e9800998ecf8427e";
final String base = "-R " + b37KGReference +
" -T VariantRecalibrator" +
" -input " + inputFile +
" -L 1:110201699" +
" -resource:truth=true,training=true,prior=15.0 " + inputFile +
" -an FS -an ReadPosRankSum -an MQ -an MQRankSum -an QD -an SOR -an BaseQRankSum"+
" --recal_file " + annoOrderRecal +
" -tranchesFile " + annoOrderTranches +
" --input_model " + exacModelReportFilename +
" -ignoreAllFilters -mode SNP" +
" --no_cmdline_in_header" ;
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(goodMd5));
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
executeTest("testVQSRAnnotationMismatch", spec).getFirst();
}
} }

View File

@ -276,4 +276,52 @@ public class VariantRecalibratorModelOutputUnitTest extends BaseTest {
return new GaussianMixtureModel(badGaussianList, shrinkage, dirichlet, priorCounts); return new GaussianMixtureModel(badGaussianList, shrinkage, dirichlet, priorCounts);
} }
@Test
public void testAnnotationOrderAndValidate() {
final VariantRecalibrator vqsr = new VariantRecalibrator();
final List<String> annotationList = new ArrayList<>();
annotationList.add("QD");
annotationList.add("FS");
annotationList.add("ReadPosRankSum");
annotationList.add("MQ");
annotationList.add("MQRankSum");
annotationList.add("SOR");
double[] meanVector = {16.13, 2.45, 0.37, 59.08, 0.14, 0.91};
final String columnName = "Mean";
final String formatString = "%.3f";
GATKReportTable annotationTable = vqsr.makeVectorTable("AnnotationMeans", "Mean for each annotation, used to normalize data", annotationList, meanVector, columnName, formatString);
vqsr.orderAndValidateAnnotations(annotationTable, annotationList);
for (int i = 0; i < vqsr.annotationOrder.size(); i++){
Assert.assertEquals(i, (int)vqsr.annotationOrder.get(i));
}
annotationList.remove(0);
annotationList.add("QD");
vqsr.orderAndValidateAnnotations(annotationTable, annotationList);
for (int i = 0; i < vqsr.annotationOrder.size(); i++) {
if (i == 0) {
Assert.assertEquals(annotationList.size()-1, (int)vqsr.annotationOrder.get(i));
} else {
Assert.assertEquals(i - 1, (int)vqsr.annotationOrder.get(i));
}
}
final List<String> annotationList2 = new ArrayList<>();
annotationList2.add("ReadPosRankSum");
annotationList2.add("MQRankSum");
annotationList2.add("MQ");
annotationList2.add("SOR");
annotationList2.add("QD");
annotationList2.add("FS");
final VariantRecalibrator vqsr2 = new VariantRecalibrator();
vqsr2.orderAndValidateAnnotations(annotationTable, annotationList2);
for (int i = 0; i < vqsr2.annotationOrder.size(); i++){
Assert.assertEquals(annotationList.get(vqsr.annotationOrder.get(i)), annotationList2.get(vqsr2.annotationOrder.get(i)));
}
}
} }

View File

@ -154,7 +154,7 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt.vcf --no_cmdline_in_header -split", "-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt.vcf --no_cmdline_in_header -split",
1, 1,
Arrays.asList("80b4a994a569ecfbd628121b85e64339")); Arrays.asList("a7dcd0cf9542bf58153023d26deec06e"));
executeTest("test left alignment of multiple alleles with genoptypes", spec); executeTest("test left alignment of multiple alleles with genoptypes", spec);
} }
@ -172,7 +172,7 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt.vcf --no_cmdline_in_header -split -keepOriginalAC", "-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt.vcf --no_cmdline_in_header -split -keepOriginalAC",
1, 1,
Arrays.asList("6774b43d9446278e716d7198717a5f85")); Arrays.asList("26e67892c75ded1756709476520d4264"));
executeTest("test left alignment of multiple alleles with genoptypes, keep original AC", spec); executeTest("test left alignment of multiple alleles with genoptypes, keep original AC", spec);
} }

View File

@ -494,7 +494,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
{ privateTestDir+"forHardLeftAlignVariantsTest.vcf", "", "a835454cbd132f2d56defb55ba13b2dd"}, { privateTestDir+"forHardLeftAlignVariantsTest.vcf", "", "a835454cbd132f2d56defb55ba13b2dd"},
{ privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT", "b19e508640a89f176f7ea347babfcc66"}, { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT", "b19e508640a89f176f7ea347babfcc66"},
{ privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -env", "15d982a280754804fa384ccc0f3a2ccf"}, { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -env", "15d982a280754804fa384ccc0f3a2ccf"},
{ privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -trimAlternates", "cbad00c0722a94c382f75b428f2a8e6c"}, { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -trimAlternates", "41ffddc776a2af55db297dbefc6d2097"},
{ privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -env -trimAlternates", "a9f448502a27e777b3112cf98e1d325f"} { privateTestDir+"multi-allelic-ordering.vcf", "-sn SAMPLE-CC -sn SAMPLE-CT -env -trimAlternates", "a9f448502a27e777b3112cf98e1d325f"}
}; };
} }
@ -782,7 +782,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates", "-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates",
1, 1,
Arrays.asList("c8203bea524cdb66717abbbc50a234b5")); Arrays.asList("f068e449cf3c142c8c5758c5eab38780"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testSACDiploid", spec); executeTest("testSACDiploid", spec);
} }
@ -794,7 +794,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates", "-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates",
1, 1,
Arrays.asList("f48a3c789898e24fbec6db9c6e8d800a")); Arrays.asList("ade30e246b807e45cf6c54db96fc8627"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testSACNonDiploid", spec); executeTest("testSACNonDiploid", spec);
} }

View File

@ -100,7 +100,7 @@ public class SelectVariantsParallelIntegrationTest extends WalkerTest {
{ // AD and PL decoding race condition { // AD and PL decoding race condition
final String testfile = privateTestDir + "race_condition.vcf"; final String testfile = privateTestDir + "race_condition.vcf";
final String args = "-env -trimAlternates -sn SAMPLE -L 1:1-10,000,000 -V " + testfile; final String args = "-env -trimAlternates -sn SAMPLE -L 1:1-10,000,000 -V " + testfile;
new ParallelSelectTestProvider(b37KGReference, args, "9983e6a41472e8956dd32f967bf87f05", nt); new ParallelSelectTestProvider(b37KGReference, args, "6bd8bf3ebd72e91eba9b4e0a44d1744f", nt);
} }
} }

View File

@ -324,12 +324,12 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
@Test() @Test()
public void testDefaultBaseQualities20() { public void testDefaultBaseQualities20() {
executeTest("testDefaultBaseQualities20", testDefaultBaseQualities(20, "90a450f74554bbd2cc3a9e0f9de68e26")); executeTest("testDefaultBaseQualities20", testDefaultBaseQualities(20, "da5a4293e125f00ecbdfd450b80d8357"));
} }
@Test() @Test()
public void testDefaultBaseQualities30() { public void testDefaultBaseQualities30() {
executeTest("testDefaultBaseQualities30", testDefaultBaseQualities(30, "ec11db4173ce3b8e43997f00dab5ae26")); executeTest("testDefaultBaseQualities30", testDefaultBaseQualities(30, "c82e72ee08d545266927a7f2e8ab24c3"));
} }
@Test(expectedExceptions = Exception.class) @Test(expectedExceptions = Exception.class)
@ -732,10 +732,10 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
public void testSAMWriterFeatures() { public void testSAMWriterFeatures() {
testBAMFeatures("-compress 0", "49228d4f5b14c4cfed4a09372eb71139"); testBAMFeatures("-compress 0", "49228d4f5b14c4cfed4a09372eb71139");
testBAMFeatures("-compress 9", "bc61a1b2b53a2ec7c63b533fa2f8701b"); testBAMFeatures("-compress 9", "bc61a1b2b53a2ec7c63b533fa2f8701b");
testBAMFeatures("-simplifyBAM", "a1127bab46674b165496b79bb9fa7964"); testBAMFeatures("-simplifyBAM", "f66b671b32d1bbbf65a88585a64b556a");
//Validate MD5 //Validate MD5
final String expectedMD5 = "c58b9114fc15b53655f2c03c819c29fd"; final String expectedMD5 = "a0c0f1fafc248ac82a84b8ca71292caf";
final File md5Target = testBAMFeatures("--generate_md5", expectedMD5); final File md5Target = testBAMFeatures("--generate_md5", expectedMD5);
final File md5File = new File(md5Target.getAbsoluteFile() + ".md5"); final File md5File = new File(md5Target.getAbsoluteFile() + ".md5");
md5File.deleteOnExit(); md5File.deleteOnExit();

View File

@ -41,16 +41,16 @@ public class CramIntegrationTest extends WalkerTest {
return new Object[][] { return new Object[][] {
{"PrintReads", "exampleBAM.bam", "", "cram", ""}, // Bypass MD5 check since the CRAM header stores the file name {"PrintReads", "exampleBAM.bam", "", "cram", ""}, // Bypass MD5 check since the CRAM header stores the file name
{"PrintReads", "exampleCRAM.cram", "", "cram", ""}, {"PrintReads", "exampleCRAM.cram", "", "cram", ""},
{"PrintReads", "exampleCRAM.cram", "", "bam", "e7834d5992a69143d7c463275213bbf8"}, {"PrintReads", "exampleCRAM.cram", "", "bam", "50c55e1040aa0ffd4036aa581fcfe8af"},
{"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "d362fbf30a2c77a2653f1c8eb2dd8fc1"}, {"PrintReads", "exampleCRAM.cram", " -L chr1:200", "bam", "f74f78bd16aed8a395f0e79e3898ab19"},
{"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"}, {"CountLoci", "exampleCRAM.cram", "", "txt", "ade93df31a6150321c1067e749cae9be"},
{"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, {"CountLoci", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
{"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"}, {"CountReads", "exampleCRAM.cram", "", "txt", "4fbafd6948b6529caa2b78e476359875"},
{"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"}, {"CountReads", "exampleCRAM.cram", " -L chr1:200", "txt", "b026324c6904b2a9cb4b88d6d61c81d1"},
{"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "a11bd125b69f651aaa2ae68c8ccab22f"}, {"PrintReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "bam", "1a6331f0e7afdccd07425df251d28811"},
{"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"}, {"CountLoci", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
{"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"}, {"CountReads", "exampleCRAM.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
{"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "9e3e8b5a58dfcb50f5b270547c01d56a"}, {"PrintReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "bam", "41f8ebb256120befc6ea7d228915b349"},
{"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"}, {"CountLoci", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "26ab0db90d72e28ad0ba1e22ee510510"},
{"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"}, {"CountReads", "exampleCRAM-nobai-withcrai.cram", " -L chr1:200 -L chr1:89597", "txt", "6d7fce9fee471194aa8b5b6e47267f03"},
}; };

View File

@ -62,7 +62,7 @@ public class IntervalIntegrationTest extends WalkerTest {
// our base file // our base file
File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam"); File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam");
spec.setOutputFileLocation(baseOutputFile); spec.setOutputFileLocation(baseOutputFile);
spec.addAuxFile("c66bb2c3c5382e2acff09b2b359562bb",createTempFileFromBase(baseOutputFile.getAbsolutePath())); spec.addAuxFile("4eeb9b07d3c9ddd4ea9eada7805903a2",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
spec.addAuxFile("fadcdf88597b9609c5f2a17f4c6eb455", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai")); spec.addAuxFile("fadcdf88597b9609c5f2a17f4c6eb455", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
executeTest("testUnmappedReadInclusion",spec); executeTest("testUnmappedReadInclusion",spec);
@ -109,8 +109,8 @@ public class IntervalIntegrationTest extends WalkerTest {
// our base file // our base file
File baseOutputFile = createTempFile("testMixedMappedAndUnmapped",".bam"); File baseOutputFile = createTempFile("testMixedMappedAndUnmapped",".bam");
spec.setOutputFileLocation(baseOutputFile); spec.setOutputFileLocation(baseOutputFile);
spec.addAuxFile("c64cff3ed376bc8f2977078dbdac4518",createTempFileFromBase(baseOutputFile.getAbsolutePath())); spec.addAuxFile("70ac0317c0c13d885e6c2de4f837cdd3",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
spec.addAuxFile("fa90ff91ac0cc689c71a3460a3530b8b", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai")); spec.addAuxFile("df469db01610ddb731868dfb28917142", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
executeTest("testMixedMappedAndUnmapped",spec); executeTest("testMixedMappedAndUnmapped",spec);
} }

View File

@ -89,7 +89,7 @@
<dependency> <dependency>
<groupId>com.intel.gkl</groupId> <groupId>com.intel.gkl</groupId>
<artifactId>gkl</artifactId> <artifactId>gkl</artifactId>
<version>0.5.2</version> <version>0.6.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>log4j</groupId> <groupId>log4j</groupId>

View File

@ -98,7 +98,7 @@ import java.util.*;
* *
* <h3>Caveats</h3> * <h3>Caveats</h3>
* <ul> * <ul>
* <li>Some annotations cannot be applied to all variant sites, so VCFs typically contain records where some annotation values are missing. By default this tool the tool will emit the special value NA for the missing annotations if you request export of an annotation for which not all records have values. You can override this behavior by setting --errorIfMissingData in the command line. As a result, the tool will throw an error if a record is missing a value.</li> * <li>Some annotations cannot be applied to all variant sites, so VCFs typically contain records where some annotation values are missing. By default this tool will emit the special value NA for the missing annotations if you request export of an annotation for which not all records have values. You can override this behavior by setting --errorIfMissingData in the command line. As a result, the tool will throw an error if a record is missing a value.</li>
* <li>When you request export of sample-level annotations (FORMAT field annotations such as GT), the annotations will be identified per-sample. If multiple samples are present in the VCF, the columns will be ordered alphabetically by sample name (SM tag).</li> * <li>When you request export of sample-level annotations (FORMAT field annotations such as GT), the annotations will be identified per-sample. If multiple samples are present in the VCF, the columns will be ordered alphabetically by sample name (SM tag).</li>
* </ul> * </ul>
* *
@ -181,7 +181,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
/** /**
* By default, this tool will write out NA values indicating missing data when it encounters a field without a value in a record. * By default, this tool will write out NA values indicating missing data when it encounters a field without a value in a record.
* If this flag is added to the command, the tool will instead exit with an error if missing data is encountered.. * If this flag is added to the command, the tool will instead exit with an error if missing data is encountered.
*/ */
@Advanced @Advanced
@Argument(fullName="errorIfMissingData", shortName="EMD", doc="If provided, we will require every record to contain every field", required=false) @Argument(fullName="errorIfMissingData", shortName="EMD", doc="If provided, we will require every record to contain every field", required=false)

View File

@ -44,13 +44,13 @@ public class BAQIntegrationTest extends WalkerTest {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@Test @Test
public void testPrintReadsNoBAQ() { public void testPrintReadsNoBAQ() {
WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq OFF", 1, Arrays.asList("e33187ca383c7f5c75c5d547ec79e1cb")); WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq OFF", 1, Arrays.asList("526dbbb390dbd36daa70ac35a8eddc36"));
executeTest(String.format("testPrintReadsNoBAQ"), spec); executeTest(String.format("testPrintReadsNoBAQ"), spec);
} }
@Test @Test
public void testPrintReadsRecalBAQ() { public void testPrintReadsRecalBAQ() {
WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq RECALCULATE", 1, Arrays.asList("a25043edfbfa4f21a13cc21064b460df")); WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq RECALCULATE", 1, Arrays.asList("25cb8f91e31a07cca2e141a119c9e020"));
executeTest(String.format("testPrintReadsRecalBAQ"), spec); executeTest(String.format("testPrintReadsRecalBAQ"), spec);
} }
} }

View File

@ -47,22 +47,22 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
} }
final static String Q10ClipOutput = "b29c5bc1cb9006ed9306d826a11d444f"; final static String Q10ClipOutput = "b29c5bc1cb9006ed9306d826a11d444f";
@Test public void testQClip0() { testClipper("clipQSum0", "-QT 0", "117a4760b54308f81789c39b1c9de578", "bcf0d1e13537f764f006ef6d9b401ea7"); } @Test public void testQClip0() { testClipper("clipQSum0", "-QT 0", "117a4760b54308f81789c39b1c9de578", "2e617e0f0b4a1bf45dbb666da0a6a16c"); }
@Test public void testQClip2() { testClipper("clipQSum2", "-QT 2", Q10ClipOutput, "27847d330b962e60650df23b6efc8c3c"); } @Test public void testQClip2() { testClipper("clipQSum2", "-QT 2", Q10ClipOutput, "1b0d2317dfd1fd5a4d14eea3f560ac17"); }
@Test public void testQClip10() { testClipper("clipQSum10", "-QT 10", "b29c5bc1cb9006ed9306d826a11d444f", "27847d330b962e60650df23b6efc8c3c"); } @Test public void testQClip10() { testClipper("clipQSum10", "-QT 10", "b29c5bc1cb9006ed9306d826a11d444f", "1b0d2317dfd1fd5a4d14eea3f560ac17"); }
@Test public void testQClip20() { testClipper("clipQSum20", "-QT 20", "6c3434dce66ae5c9eeea502f10fb9bee", "f89ec5439e88f5a75433150da0069034"); } @Test public void testQClip20() { testClipper("clipQSum20", "-QT 20", "6c3434dce66ae5c9eeea502f10fb9bee", "3a9a10966cce58baf9e9400a2c5f99ca"); }
@Test public void testClipRange1() { testClipper("clipRange1", "-CT 1-5", "b5acd753226e25b1e088838c1aab9117", "987007f6e430cad4cb4a8d1cc1f45d91"); } @Test public void testClipRange1() { testClipper("clipRange1", "-CT 1-5", "b5acd753226e25b1e088838c1aab9117", "701f03388ffaeb0f92be1e719e2477df"); }
@Test public void testClipRange2() { testClipper("clipRange2", "-CT 1-5,11-15", "be4fcad5b666a5540028b774169cbad7", "ec4cf54ed50a6baf69dbf98782c19aeb"); } @Test public void testClipRange2() { testClipper("clipRange2", "-CT 1-5,11-15", "be4fcad5b666a5540028b774169cbad7", "7dc5da735c437b6523e0823a1ed0e981"); }
@Test public void testClipSeq() { testClipper("clipSeqX", "-X CCCCC", "db199bd06561c9f2122f6ffb07941fbc", "a9cf540e4ed2514061248a878e09a09c"); } @Test public void testClipSeq() { testClipper("clipSeqX", "-X CCCCC", "db199bd06561c9f2122f6ffb07941fbc", "daa41acd8c9f1a75c8107d0fd0db8cf0"); }
@Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + privateTestDir + "seqsToClip.fasta", "22cd33dbd06cc425ce3626360367bfca", "906871df304dd966682e5798d59fc86b"); } @Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + privateTestDir + "seqsToClip.fasta", "22cd33dbd06cc425ce3626360367bfca", "b1812de69c967059362139a269044362"); }
@Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + privateTestDir + "seqsToClip.fasta -X CCCCC", "e4a88276a6f76ccc2b05a06c76305a1c", "b41995fea04034ca0427c4a71504ef83"); } @Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + privateTestDir + "seqsToClip.fasta -X CCCCC", "e4a88276a6f76ccc2b05a06c76305a1c", "9f999ed70684fdfb8dba019ff71741fe"); }
@Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", Q10ClipOutput, "27847d330b962e60650df23b6efc8c3c"); } @Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", Q10ClipOutput, "1b0d2317dfd1fd5a4d14eea3f560ac17"); }
@Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", Q10ClipOutput, "195b8bdfc0186fdca742764aa9b06363"); } @Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", Q10ClipOutput, "b7ce95f277d5f66b7f5a60ffbf9692cf"); }
@Test public void testClipSoft() { testClipper("testClipSoft", "-QT 10 -CR SOFTCLIP_BASES", Q10ClipOutput, "08d16051be0b3fa3453eb1e6ca48b098"); } @Test public void testClipSoft() { testClipper("testClipSoft", "-QT 10 -CR SOFTCLIP_BASES", Q10ClipOutput, "55c9d86c18c5a59a98429e8a5076e871"); }
@Test @Test
public void testUseOriginalQuals() { public void testUseOriginalQuals() {
@ -74,7 +74,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
" -OQ -QT 4 -CR WRITE_Q0S" + " -OQ -QT 4 -CR WRITE_Q0S" +
" -o %s -os %s", " -o %s -os %s",
2, 2,
Arrays.asList("a2819d54b2110150e38511f5a55db91d", "55c01ccc2e84481b22d3632cdb06c8ba")); Arrays.asList("0e4c116fa07c3d8190eefe6968a0b819", "55c01ccc2e84481b22d3632cdb06c8ba"));
executeTest("clipOriginalQuals", spec); executeTest("clipOriginalQuals", spec);
} }
} }

View File

@ -59,27 +59,27 @@ public class PrintReadsIntegrationTest extends WalkerTest {
@DataProvider(name = "PRTest") @DataProvider(name = "PRTest")
public Object[][] createPrintReadsTestData() { public Object[][] createPrintReadsTestData() {
return new Object[][]{ return new Object[][]{
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, "", "0aa3505ba61e05663e629011dd54e423")}, {new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, "", "83d1454dc01cd2e7458dad4012695f64")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -compress 0", "0aec10d19e0dbdfe1d0cbb3eddaf623a")}, {new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -compress 0", "0aec10d19e0dbdfe1d0cbb3eddaf623a")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -simplifyBAM", "c565d9cd4838a313e7bdb30530c0cf71")}, {new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -simplifyBAM", "60255a68df1b8f2fbba373d75274f0de")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -n 10", "917440a38aba707ec0e012168590981a")}, {new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -n 10", "eb7c6bacca5fee09b8df50880eb81ee6")},
// See: GATKBAMIndex.getStartOfLastLinearBin(), BAMScheduler.advance(), IntervalOverlapFilteringIterator.advance() // See: GATKBAMIndex.getStartOfLastLinearBin(), BAMScheduler.advance(), IntervalOverlapFilteringIterator.advance()
{new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, "", "0b58c903f54e8543a8b2ce1439aa769b")}, {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, "", "3d67c398ce2ac1deeddbccbd850380a7")},
{new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L 1", "5b1154cc81dba6bcfe76188e4df8d79c")}, {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L 1", "37cdd8871843693f2650d7b48c8ae1d4")},
{new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.cram"}, " -L 1:10001 -L GL000192.1:500204", "a84efdc3d4a8d6329b5f0b494dd280d2")}, {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.cram"}, " -L 1:10001 -L GL000192.1:500204", "4d63fd6e977a53e5d9590bd030b40bd0")},
{new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L unmapped", "cbd3d1d50c8674f79033aa8c36aa3cd1")}, {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L unmapped", "a834400e3bd69045eb8a9e94131633f5")},
{new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L 1 -L unmapped", "5b1154cc81dba6bcfe76188e4df8d79c")}, {new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, " -L 1 -L unmapped", "37cdd8871843693f2650d7b48c8ae1d4")},
{new PRTest(b37KGReference, new String[]{"oneReadAllInsertion.bam"}, "", "e212d1799ae797e781b17e630656a9a1")}, {new PRTest(b37KGReference, new String[]{"oneReadAllInsertion.bam"}, "", "6c04aac25e2136fee395897aac96bea8")},
{new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam"}, "", "0387c61303140d8899fcbfdd3e72ed80")}, {new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam"}, "", "57a9bc1f7dd4e7717ee796c484bcf45a")},
// Tests for filtering options // Tests for filtering options
{new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam", "NA20313.highCoverageRegion.bam"}, {new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam", "NA20313.highCoverageRegion.bam"},
"", "ad56da66be0bdab5a8992de9617ae6a5")}, "", "e691d61df10f7614d73c8ecb46c75ee1")},
{new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam", "NA20313.highCoverageRegion.bam"}, {new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam", "NA20313.highCoverageRegion.bam"},
" -readGroup SRR359098", "c3bfe28722a665e666098dbb7048a9f1")}, " -readGroup SRR359098", "7644eab114bf537411218f782d75a6a6")},
{new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam", "NA20313.highCoverageRegion.bam"}, {new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam", "NA20313.highCoverageRegion.bam"},
" -readGroup 20FUK.3 -sn NA12878", "8191f8d635d00b1f4d0993b785cc46c5")}, " -readGroup 20FUK.3 -sn NA12878", "351d5da29874033e50d29c5c36575a6c")},
{new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam", "NA20313.highCoverageRegion.bam"}, {new PRTest(b37KGReference, new String[]{"NA12878.1_10mb_2_10mb.bam", "NA20313.highCoverageRegion.bam"},
" -sn na12878", "92a85b4223ec45e114f12a1fe6ebbaeb")}, " -sn na12878", "9056d852418dd2083f38e3eac1551fcd")},
}; };
} }

View File

@ -43,9 +43,9 @@ public class SplitSamFileIntegrationTest extends WalkerTest {
" --outputRoot " + prefix, " --outputRoot " + prefix,
Collections.<String>emptyList() Collections.<String>emptyList()
); );
addSplitOutput(spec, prefix, "NA12878", "3e28b666fb673be138eca9bd3db9520b"); addSplitOutput(spec, prefix, "NA12878", "356bddf614a44a0234f7b116f5cde998");
addSplitOutput(spec, prefix, "NA12891", "af01069bc3da4252ce8417a03d11f48b"); addSplitOutput(spec, prefix, "NA12891", "5dd92a0089e3f781e77d94010264b9fd");
addSplitOutput(spec, prefix, "NA12892", "cfa1cb6aaca03900160bd5627f4f698b"); addSplitOutput(spec, prefix, "NA12892", "ea8f16c18c89b49ce9d32d76c6af75f9");
executeTest("testSplitSamFile", spec); executeTest("testSplitSamFile", spec);
} }

View File

@ -851,9 +851,13 @@ public class GATKVariantContextUtils {
if ( newLikelihoods == null || (originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0) == 0 && likelihoodsAreUninformative(newLikelihoods) )) { if ( newLikelihoods == null || (originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0) == 0 && likelihoodsAreUninformative(newLikelihoods) )) {
gb.noPL(); gb.noPL();
gb.noGQ();
} else { } else {
gb.PL(newLikelihoods); gb.PL(newLikelihoods);
final int PLindex = MathUtils.maxElementIndex(newLikelihoods);
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
} }
} }
// create the new strand allele counts array from the used alleles // create the new strand allele counts array from the used alleles

View File

@ -1503,7 +1503,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homC3AllelesSAC).make()).make(), attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homC3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(), new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{-20, 0}).AD(new int[]{0, 1}). Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{-20, 0}).AD(new int[]{0, 1}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 1, 1}).GQ(100).make())}); attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 1, 1}).GQ(200).make())});
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, Aref)).AD(homRef3AllelesAD).PL(triploidRef3AllelesPL). new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, Aref)).AD(homRef3AllelesAD).PL(triploidRef3AllelesPL).
@ -1522,7 +1522,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homC3AllelesSAC).make()).make(), attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homC3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(), new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, G)).PL(new double[]{0, -40, -70, -90}).AD(new int[]{20, 1}). Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, G)).PL(new double[]{0, -40, -70, -90}).AD(new int[]{20, 1}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 1, 1}).GQ(100).make())}); attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 1, 1}).GQ(400).make())});
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL). new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).
@ -1547,19 +1547,19 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, hetRefG3AllelesSAC).make()).make(), attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, hetRefG3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(), new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, 0, -50}).AD(new int[]{10, 11}). Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, 0, -50}).AD(new int[]{10, 11}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{10, 10, 11, 11}).GQ(100).make())}); attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{10, 10, 11, 11}).GQ(200).make())});
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetCG3AllelesAD).PL(hetCG3AllelesPL). new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetCG3AllelesAD).PL(hetCG3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, hetCG3AllelesSAC).make()).make(), attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, hetCG3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(), new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -20, -30}).AD(new int[]{0, 11}). Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -20, -30}).AD(new int[]{0, 11}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 11, 11}).GQ(100).make())}); attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 11, 11}).GQ(200).make())});
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homG3AllelesAD).PL(homG3AllelesPL). new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homG3AllelesAD).PL(homG3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homG3AllelesSAC).make()).make(), attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homG3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(), new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -40, 0}).AD(new int[]{0, 21}). Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -40, 0}).AD(new int[]{0, 21}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 21, 21}).GQ(100).make())}); attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 21, 21}).GQ(200).make())});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }