Tile is now a standard covariate. By default the TileCovariate returns -1 if tile can't be derived from the read's name. Added a new command line option -throwTileException which will force TileCovariate to throw an exception if tile can't be derived for a read. Singleton covariates, such as any read group without tile info, must be skipped over in TableRecalibration so that the sequential formulation doesn't apply the same correction more than once. TileCovariate class has been added to the Early Access package.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2544 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-07 22:51:41 +00:00
parent d01bde36a4
commit f587ff46af
5 changed files with 40 additions and 9 deletions

View File

@ -141,6 +141,7 @@ public class RecalDataManager {
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
for( NestedHashMap map : dataCollapsedByCovariate ) {
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
checkForSingletons(map.data);
}
}
@ -156,6 +157,20 @@ public class RecalDataManager {
}
}
private void checkForSingletons( final Map data ) {
for( Object comp : data.keySet() ) {
final Object val = data.get(comp);
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
if( data.keySet().size() == 1) {
data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done in a previous sequential step
}
} else { // Another layer in the nested hash map
checkForSingletons( (Map) val );
}
}
}
/**
* Get the appropriate collapsed table out of the set of all the tables held by this Object
* @param covariate Which covariate indexes the desired collapsed HashMap

View File

@ -59,6 +59,9 @@ public class RecalibrationArgumentCollection {
public int WINDOW_SIZE = 5;
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
public int HOMOPOLYMER_NBACK = 7;
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
public boolean EXCEPTION_IF_NO_TILE = false;
public boolean checkSolidRecalMode() {
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||

View File

@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.recalibration;
import org.broadinstitute.sting.utils.StingException;
@ -30,17 +31,25 @@ import edu.mit.broad.picard.illumina.parser.IlluminaUtil;
/**
* @author alecw@broadinstitute.org
*/
public class TileCovariate implements ExperimentalCovariate {
public class TileCovariate implements StandardCovariate {
private static boolean exceptionWhenNoTile = false;
// Initialize any member variables using the command-line arguments passed to the walkers
public void initialize( final RecalibrationArgumentCollection RAC ) {
exceptionWhenNoTile = RAC.EXCEPTION_IF_NO_TILE;
}
// Used to pick out the covariate's value from attributes of the read
public Comparable getValue(final SAMRecord read, final int offset) {
Integer tile = IlluminaUtil.getTileFromReadName(read.getReadName());
if (tile == null) {
throw new StingException("Tile number not defined for read");
if( exceptionWhenNoTile ) {
throw new StingException( "Tile number not defined for read: " + read.getReadName() );
} else {
return -1;
}
}
return tile;
}

View File

@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariates1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "604c0d898c9df9acbeeade9979707546" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f03f6186f54bc3f841639a206d424d97");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "fe873886e3efa3566f08a6ae26d71b43" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "996450b7c2e9ef68b9e76d57f2d5288a" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "ff1b3a18b67b09560cacc3b5dea0a034");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a93b0263acdc856b885f95848852140d" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -36,6 +36,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -cov QualityScoreCovariate" +
" -cov CycleCovariate" +
" -cov DinucCovariate" +
" -cov TileCovariate" +
" --solid_recal_mode SET_Q_ZERO" +
" -recalFile %s",
1, // just one output file
@ -48,10 +49,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "d1a9b38f782af3edf223908cb71c7205" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b7f4d3bfb761f29531a37336615046ff" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1faadda11d5c0278575d2f0368a65f14" );
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a39afc94ed74f8137c9d43285997bd90" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();

View File

@ -25,6 +25,7 @@
<class>org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate</class>
<!-- Local realignment around indels -->
<class>org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector</class>
<class>org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker</class>
@ -62,6 +63,8 @@
<class>org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate</class>
</dependencies>
</executable>
<resources>