Added a CountCovariates integration test that uses a vcf file as the list of variant sites to skip over instead of the usual dbSNP rod.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2152 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3484f652e7
commit
c9ff5f209c
|
|
@ -110,7 +110,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
|
||||
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
|
||||
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
||||
private final String versionString = "v2.0.4"; // Major version, minor version, and build number
|
||||
private final String versionString = "v2.0.5"; // Major version, minor version, and build number
|
||||
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
||||
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||
|
|
@ -301,7 +301,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
}
|
||||
|
||||
// This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again
|
||||
// Lots of lines just pulling things out of the SAMRecord and stuffing into local variables
|
||||
// Lots of lines just pulling things out of the SAMRecord and stuffing into local variables, many edge cases to worry about
|
||||
// These lines should be exactly the same in TableRecalibrationWalker
|
||||
quals = read.getBaseQualities();
|
||||
// Check if we need to use the original quality scores instead
|
||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||
|
|
@ -350,7 +351,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
refBase = (byte)ref.getBase();
|
||||
prevBase = readDatum.bases[offset - 1];
|
||||
|
||||
// Get the complement base strand if we are a negative strand read, DinucCovariate is responsible for getting the complement bases if needed
|
||||
// DinucCovariate is responsible for getting the complement bases if needed
|
||||
if( readDatum.isNegStrand ) {
|
||||
prevBase = readDatum.bases[offset + 1];
|
||||
}
|
||||
|
|
@ -415,8 +416,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
|
||||
if ( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) {
|
||||
if ( readCharBaseIndex != refCharBaseIndex )
|
||||
if ( readCharBaseIndex != refCharBaseIndex ) {
|
||||
counts.first++;
|
||||
}
|
||||
counts.second++;
|
||||
}
|
||||
}
|
||||
|
|
@ -426,8 +428,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
* Validate the dbSNP mismatch rates.
|
||||
*/
|
||||
private void validateDbsnpMismatchRate() {
|
||||
if ( novel_counts.second == 0 || dbSNP_counts.second == 0 )
|
||||
if ( novel_counts.second == 0 || dbSNP_counts.second == 0 ) {
|
||||
return;
|
||||
}
|
||||
|
||||
double fractionMM_novel = (double)novel_counts.first / (double)novel_counts.second;
|
||||
double fractionMM_dbsnp = (double)dbSNP_counts.first / (double)dbSNP_counts.second;
|
||||
|
|
@ -464,7 +467,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
||||
datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
|
||||
if( VALIDATE_OLD_RECALIBRATOR || SORTED_OUTPUT ) {
|
||||
dataManager.data.myPut( key, datum );
|
||||
dataManager.data.sortedPut( key, datum );
|
||||
} else {
|
||||
dataManager.data.put( key, datum );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import java.util.*;
|
|||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*
|
||||
* A HashMap that maps a list of comparables to any object <T>.
|
||||
* A HashMap that maps a list of comparables to any Object <T>.
|
||||
* There is functionality for the mappings to be given back to you in sorted order.
|
||||
*/
|
||||
|
||||
|
|
@ -55,9 +55,8 @@ public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
|
|||
}
|
||||
|
||||
|
||||
// This method is here only to help facilitate direct comparison of old and refactored recalibrator.
|
||||
// The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to.
|
||||
public T myPut(List<? extends Comparable> key, T value) {
|
||||
// This method is here only to help facilitate outputting the mappings in sorted order
|
||||
public T sortedPut(List<? extends Comparable> key, T value) {
|
||||
|
||||
if( keyLists == null ) {
|
||||
keyLists = new ArrayList<ArrayList<Comparable>>();
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "7842b33245a53b13c7b6cd4e4616ac11");
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "cbe17568c5f03516a88ff0c5ce33a87b" );
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "8806fb7f0e669d1abab61dc4b3d10d8f" );
|
||||
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
|
@ -30,7 +32,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
( bam.equals( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
|
||||
? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) +
|
||||
? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) +
|
||||
" -cov ReadGroupCovariate" +
|
||||
" -cov QualityScoreCovariate" +
|
||||
" -cov CycleCovariate" +
|
||||
|
|
@ -74,4 +76,32 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesVCF() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d5962c39a53a511d7ec710d5343e7a37");
|
||||
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" -B dbsnp,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" -cov ReadGroupCovariate" +
|
||||
" -cov QualityScoreCovariate" +
|
||||
" -cov CycleCovariate" +
|
||||
" -cov DinucCovariate" +
|
||||
" --sorted_output" +
|
||||
" -recalFile %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testCountCovariatesVCF", spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue