diff --git a/java/src/org/broad/tribble/vcf/VCFCodec.java b/java/src/org/broad/tribble/vcf/VCFCodec.java index cb400fff3..0f6a378c0 100644 --- a/java/src/org/broad/tribble/vcf/VCFCodec.java +++ b/java/src/org/broad/tribble/vcf/VCFCodec.java @@ -72,6 +72,8 @@ public class VCFCodec implements FeatureCodec { try { while ((line = reader.readLine()) != null) { if (line.startsWith("##")) { + if ( line.startsWith("##fileformat") && ! line.startsWith("##fileformat=VCFv3" ) ) + throw new CodecLineParsingException("VCF codec can only parse VCF3 formated files. Your version line is " + line + ". If you want to parse VCF4, use VCF4 use VCF as the rod type"); headerStrings.add(line); } else if (line.startsWith("#")) { diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFHeaderLine.java index fda574a3e..f3dadccf5 100644 --- a/java/src/org/broad/tribble/vcf/VCFHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFHeaderLine.java @@ -42,6 +42,7 @@ public class VCFHeaderLine implements Comparable { private String stringRep = null; private String mKey = null; private String mValue = null; + protected VCFHeaderVersion mVersion = null; /** @@ -135,6 +136,10 @@ public class VCFHeaderLine implements Comparable { this.mVersion = version; } + public VCFHeaderVersion getVersion() { + return mVersion; + } + /** * create a string of a mapping pair for the target VCF version * @param keyValues a mapping of the key->value pairs to output diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java index 90afdd594..014a3b5c5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java @@ -80,6 +80,8 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { while ((line = reader.readLine()) != null) { lineNo++; if (line.startsWith("##")) { + if ( line.startsWith("##fileformat") && ! line.startsWith("##fileformat=VCFv4" ) ) + throw new CodecLineParsingException("VCF4 codec can only parse VCF4 formated files. Your version line is " + line + ". If you want VCF3 parsing, use VCF as the rod type."); headerStrings.add(line); } else if (line.startsWith("#")) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 4232bdf92..54c1a8995 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -70,7 +70,7 @@ import java.util.Map; @WalkerName( "CountCovariates" ) @ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality @Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta -public class CovariateCounterWalker extends LocusWalker { +public class CovariateCounterWalker extends LocusWalker implements TreeReducible { ///////////////////////////// // Constants @@ -87,6 +87,9 @@ public class CovariateCounterWalker extends LocusWalker { ///////////////////////////// // Command Line Arguments ///////////////////////////// + @Argument(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the outputted covariates table recalibration file") + public PrintStream RECAL_FILE; + @Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false) private boolean LIST_ONLY = false; @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required=false) @@ -230,6 +233,12 @@ public class CovariateCounterWalker extends LocusWalker { logger.info( "\t" + cov.getClass().getSimpleName() ); cov.initialize( RAC ); // Initialize any covariate member variables using the shared argument collection } + +// try { +// stream = new PrintStream( RAC.RECAL_FILE ); +// } catch ( FileNotFoundException e ) { +// throw new RuntimeException( "Couldn't open output file: ", e ); +// } } @@ -263,15 +272,10 @@ public class CovariateCounterWalker extends LocusWalker { if( !isSNP && ( ++numUnprocessed >= PROCESS_EVERY_NTH_LOCUS ) ) { numUnprocessed = 0; // Reset the counter because we are processing this very locus - GATKSAMRecord gatkRead; - int offset; - byte refBase; - byte[] bases; - // For each read at this locus for( PileupElement p : context.getBasePileup() ) { - gatkRead = (GATKSAMRecord) p.getRead(); - offset = p.getOffset(); + GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead(); + int offset = p.getOffset(); if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) { continue; @@ -297,8 +301,8 @@ public class CovariateCounterWalker extends LocusWalker { // Skip this position if base quality is zero if( gatkRead.getBaseQualities()[offset] > 0 ) { - bases = gatkRead.getReadBases(); - refBase = ref.getBase(); + byte[] bases = gatkRead.getReadBases(); + byte refBase = ref.getBase(); // Skip if this base is an 'N' or etc. if( BaseUtils.isRegularBase( bases[offset] ) ) { @@ -390,8 +394,6 @@ public class CovariateCounterWalker extends LocusWalker { * @param refBase The reference base at this locus */ private void updateDataFromRead(final GATKSAMRecord gatkRead, final int offset, final byte refBase) { - - final Object[][] covars = (Comparable[][]) gatkRead.getTemporaryAttribute(COVARS_ATTRIBUTE); final Object[] key = covars[offset]; @@ -426,11 +428,7 @@ public class CovariateCounterWalker extends LocusWalker { * @return returns A PrintStream created from the -recalFile filename argument specified to the walker */ public PrintStream reduceInit() { - try { - return new PrintStream( RAC.RECAL_FILE ); - } catch ( FileNotFoundException e ) { - throw new RuntimeException( "Couldn't open output file: ", e ); - } + return RECAL_FILE; } /** @@ -440,7 +438,11 @@ public class CovariateCounterWalker extends LocusWalker { * @return returns The PrintStream used to output the CSV data */ public PrintStream reduce( Integer value, PrintStream recalTableStream ) { - return recalTableStream; // Nothing to do here + return recalTableStream; // Nothing to do here, just return our open stream + } + + public PrintStream treeReduce( PrintStream recalTableStream1, PrintStream recalTableStream2 ) { + return recalTableStream1; // Nothing to do here, just return our open stream } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java index b3bb58f61..c4ad3869a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java @@ -71,22 +71,22 @@ public class RecalDatumOptimized { // //--------------------------------------------------------------------------------------------------------------- - public final void increment( final long incObservations, final long incMismatches ) { + public synchronized final void increment( final long incObservations, final long incMismatches ) { numObservations += incObservations; numMismatches += incMismatches; } - public final void increment( final RecalDatumOptimized other ) { + public synchronized final void increment( final RecalDatumOptimized other ) { increment( other.numObservations, other.numMismatches ); } - public final void increment( final List data ) { + public synchronized final void increment( final List data ) { for ( RecalDatumOptimized other : data ) { this.increment( other ); } } - public final void increment( final char curBase, final char refBase ) { + public synchronized final void increment( final char curBase, final char refBase ) { increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(refBase) ? 0 : 1 ); // increment takes num observations, then num mismatches } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index da7cf996c..62d8543a6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -41,8 +41,6 @@ public class RecalibrationArgumentCollection { ////////////////////////////////// // Shared Command Line Arguments ////////////////////////////////// - @Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file") - public String RECAL_FILE = "output.recal_data.csv"; @Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.") public String DEFAULT_READ_GROUP = null; @Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.") diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 7d876ddff..4a8c85415 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -87,6 +87,9 @@ public class TableRecalibrationWalker extends ReadWalker entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); + for ( String parallelism : Arrays.asList("") ) { // todo -- enable parallel tests. They work but there's a system bug Arrays.asList("", " -nt 4")) { + for ( Map.Entry entry : e.entrySet() ) { + String bam = entry.getKey(); + String md5 = entry.getValue(); - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -T CountCovariates" + - " -I " + bam + - ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) - ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) + - " -cov ReadGroupCovariate" + - " -cov QualityScoreCovariate" + - " -cov CycleCovariate" + - " -cov DinucCovariate" + - " -cov TileCovariate" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testCountCovariates1", spec).getFirst(); - paramsFiles.put(bam, result.get(0).getAbsolutePath()); + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + oneKGLocation + "reference/human_b36_both.fasta" + + " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " -T CountCovariates" + + " -I " + bam + + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) + ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) + + " -cov ReadGroupCovariate" + + " -cov QualityScoreCovariate" + + " -cov CycleCovariate" + + " -cov DinucCovariate" + + " -cov TileCovariate" + + " --solid_recal_mode SET_Q_ZERO" + + " -recalFile %s" + parallelism, + 1, // just one output file + Arrays.asList(md5)); + List result = executeTest("testCountCovariates1" + parallelism, spec).getFirst(); + paramsFiles.put(bam, result.get(0).getAbsolutePath()); + } } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java index 016fac9cc..0f7243ef0 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java @@ -8,8 +8,7 @@ import java.util.ArrayList; public class RecalibrationWalkersPerformanceTest extends WalkerTest { - @Test - public void testCountCovariatesWholeGenome() { + private void testCountCovariatesWholeGenomeRunner(String moreArgs) { WalkerTestSpec spec = new WalkerTestSpec( "-R " + seqLocation + "references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta" + " -T CountCovariates" + @@ -17,14 +16,13 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L chr1:1-50,000,000" + " -standard" + " -OQ" + - " -recalFile /dev/null", + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); executeTest("testCountCovariatesWholeGenome", spec); } - @Test - public void testCountCovariatesWholeExome() { + private void testCountCovariatesWholeExomeRunner(String moreArgs) { WalkerTestSpec spec = new WalkerTestSpec( "-R " + seqLocation + "references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta" + " -T CountCovariates" + @@ -32,12 +30,22 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + " -standard" + " -OQ" + - " -recalFile /dev/null", + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); executeTest("testCountCovariatesWholeExome", spec); } + @Test + public void testCountCovariatesWholeGenome() { testCountCovariatesWholeGenomeRunner(""); } + @Test + public void testCountCovariatesWholeGenomeParallel() { testCountCovariatesWholeGenomeRunner(" -nt 4"); } + + @Test + public void testCountCovariatesWholeExome() { testCountCovariatesWholeExomeRunner(""); } + @Test + public void testCountCovariatesWholeExomeParallel() { testCountCovariatesWholeExomeRunner(" -nt 4"); } + @Test public void testTableRecalibratorWholeGenome() { WalkerTestSpec spec = new WalkerTestSpec( diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 3d7a34b56..c82aa13ac 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -29,7 +29,7 @@ public class String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s", - 1, Arrays.asList("97d2471ed6ee79d70ce5bd9cc0be2239")); + 1, Arrays.asList("119601d7e9e67a1053663b2e546250ed")); executeTest("testSelect1", spec); } } @@ -38,7 +38,7 @@ public class public void testSelect2() { String extraArgs = "-L 1:1-10,000,000"; WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s", - 1, Arrays.asList("e3366d73ee7cdf630c29809ce230e32e")); + 1, Arrays.asList("06d495ab8169a2570eebdc54ecdffe10")); executeTest("testSelect2", spec); } @@ -48,7 +48,7 @@ public class for (String vcfFile : vcfFiles) { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B eval,VCF," + validationDataLocation + vcfFile + " -B comp,VCF," + validationDataLocation + "GenotypeConcordanceComp.vcf -E GenotypeConcordance -reportType CSV -o %s", 1, - Arrays.asList("51574b4ab0b381c5a01268f91e78b25c")); + Arrays.asList("15d1075d384da2bb7445f7493f2b6a07")); executeTest("testVEGenotypeConcordance" + vcfFile, spec); } @@ -57,8 +57,8 @@ public class @Test public void testVESimple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "8f76d8c0e3a8a5836bb5bf423e04c268"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "6b128bacbd0402471bd6d4e3f9283c47"); + expectations.put("-L 1:1-10,000,000", "629b8b124306435ff56b66357354dfbc"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "f51c299d500b347d098c7ab25f54a436"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -80,10 +80,10 @@ public class " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String matchingMD5 = "aca5a64a0e4850906db2bd820253b784"; + String matchingMD5 = "d01725ce4e46c8fea0855a923c1598fd"; expectations.put("", matchingMD5); expectations.put(" -known comp_hapmap -known dbsnp", matchingMD5); - expectations.put(" -known comp_hapmap", "442213609c2866f7a90cbc4b3486441a"); + expectations.put(" -known comp_hapmap", "a50be9240f6c90503fb6333d8a78b974"); for (String tests : testsEnumerations) { for (Map.Entry entry : expectations.entrySet()) { String extraArgs2 = entry.getKey(); @@ -118,7 +118,7 @@ public class for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "d94328f4a5f7c40e95edf2ef13f38ae0")); + Arrays.asList("483f821ce96f4cf571e9bba356c9f325", "d94328f4a5f7c40e95edf2ef13f38ae0")); executeTest("testVEWriteVCF", spec); } }