Added a CountCovariates integration test that uses a vcf file as the list of variant sites to skip over instead of the usual dbSNP rod.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2152 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-11-24 21:51:38 +00:00
parent 3484f652e7
commit c9ff5f209c
3 changed files with 43 additions and 11 deletions

View File

@ -110,7 +110,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
private final String versionString = "v2.0.4"; // Major version, minor version, and build number
private final String versionString = "v2.0.5"; // Major version, minor version, and build number
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
@ -301,7 +301,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
}
// This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again
// Lots of lines just pulling things out of the SAMRecord and stuffing into local variables
// Lots of lines just pulling things out of the SAMRecord and stuffing into local variables, many edge cases to worry about
// These lines should be exactly the same in TableRecalibrationWalker
quals = read.getBaseQualities();
// Check if we need to use the original quality scores instead
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
@ -350,7 +351,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
refBase = (byte)ref.getBase();
prevBase = readDatum.bases[offset - 1];
// Get the complement base strand if we are a negative strand read, DinucCovariate is responsible for getting the complement bases if needed
// DinucCovariate is responsible for getting the complement bases if needed
if( readDatum.isNegStrand ) {
prevBase = readDatum.bases[offset + 1];
}
@ -415,8 +416,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref);
if ( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) {
if ( readCharBaseIndex != refCharBaseIndex )
if ( readCharBaseIndex != refCharBaseIndex ) {
counts.first++;
}
counts.second++;
}
}
@ -426,8 +428,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
* Validate the dbSNP mismatch rates.
*/
private void validateDbsnpMismatchRate() {
if ( novel_counts.second == 0 || dbSNP_counts.second == 0 )
if ( novel_counts.second == 0 || dbSNP_counts.second == 0 ) {
return;
}
double fractionMM_novel = (double)novel_counts.first / (double)novel_counts.second;
double fractionMM_dbsnp = (double)dbSNP_counts.first / (double)dbSNP_counts.second;
@ -464,7 +467,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
if( VALIDATE_OLD_RECALIBRATOR || SORTED_OUTPUT ) {
dataManager.data.myPut( key, datum );
dataManager.data.sortedPut( key, datum );
} else {
dataManager.data.put( key, datum );
}

View File

@ -35,7 +35,7 @@ import java.util.*;
* User: rpoplin
* Date: Oct 30, 2009
*
* A HashMap that maps a list of comparables to any object <T>.
* A HashMap that maps a list of comparables to any Object <T>.
* There is functionality for the mappings to be given back to you in sorted order.
*/
@ -55,9 +55,8 @@ public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
}
// This method is here only to help facilitate direct comparison of old and refactored recalibrator.
// The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to.
public T myPut(List<? extends Comparable> key, T value) {
// This method is here only to help facilitate outputting the mappings in sorted order
public T sortedPut(List<? extends Comparable> key, T value) {
if( keyLists == null ) {
keyLists = new ArrayList<ArrayList<Comparable>>();

View File

@ -20,6 +20,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "7842b33245a53b13c7b6cd4e4616ac11");
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "cbe17568c5f03516a88ff0c5ce33a87b" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "8806fb7f0e669d1abab61dc4b3d10d8f" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
String md5 = entry.getValue();
@ -30,7 +32,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -T CountCovariates" +
" -I " + bam +
( bam.equals( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) +
? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) +
" -cov ReadGroupCovariate" +
" -cov QualityScoreCovariate" +
" -cov CycleCovariate" +
@ -74,4 +76,32 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
}
}
}
@Test
public void testCountCovariatesVCF() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d5962c39a53a511d7ec710d5343e7a37");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
String md5 = entry.getValue();
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R /broad/1KG/reference/human_b36_both.fasta" +
" -B dbsnp,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf" +
" -T CountCovariates" +
" -I " + bam +
" -L 1:10,000,000-10,200,000" +
" -cov ReadGroupCovariate" +
" -cov QualityScoreCovariate" +
" -cov CycleCovariate" +
" -cov DinucCovariate" +
" --sorted_output" +
" -recalFile %s",
1, // just one output file
Arrays.asList(md5));
executeTest("testCountCovariatesVCF", spec);
}
}
}