Tile is now a standard covariate. By default the TileCovariate returns -1 if tile can't be derived from the read's name. Added a new command line option -throwTileException which will force TileCovariate to throw an exception if tile can't be derived for a read. Singleton covariates, such as any read group without tile info, must be skipped over in TableRecalibration so that the sequential formulation doesn't apply the same correction more than once. TileCovariate class has been added to the Early Access package.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2544 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d01bde36a4
commit
f587ff46af
|
|
@ -141,6 +141,7 @@ public class RecalDataManager {
|
|||
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
|
||||
for( NestedHashMap map : dataCollapsedByCovariate ) {
|
||||
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
|
||||
checkForSingletons(map.data);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -156,6 +157,20 @@ public class RecalDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
private void checkForSingletons( final Map data ) {
|
||||
|
||||
for( Object comp : data.keySet() ) {
|
||||
final Object val = data.get(comp);
|
||||
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
|
||||
if( data.keySet().size() == 1) {
|
||||
data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done in a previous sequential step
|
||||
}
|
||||
} else { // Another layer in the nested hash map
|
||||
checkForSingletons( (Map) val );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the appropriate collapsed table out of the set of all the tables held by this Object
|
||||
* @param covariate Which covariate indexes the desired collapsed HashMap
|
||||
|
|
|
|||
|
|
@ -59,6 +59,9 @@ public class RecalibrationArgumentCollection {
|
|||
public int WINDOW_SIZE = 5;
|
||||
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
|
||||
public int HOMOPOLYMER_NBACK = 7;
|
||||
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
|
||||
public boolean EXCEPTION_IF_NO_TILE = false;
|
||||
|
||||
|
||||
public boolean checkSolidRecalMode() {
|
||||
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
@ -30,17 +31,25 @@ import edu.mit.broad.picard.illumina.parser.IlluminaUtil;
|
|||
/**
|
||||
* @author alecw@broadinstitute.org
|
||||
*/
|
||||
public class TileCovariate implements ExperimentalCovariate {
|
||||
|
||||
public class TileCovariate implements StandardCovariate {
|
||||
|
||||
private static boolean exceptionWhenNoTile = false;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
public void initialize( final RecalibrationArgumentCollection RAC ) {
|
||||
exceptionWhenNoTile = RAC.EXCEPTION_IF_NO_TILE;
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public Comparable getValue(final SAMRecord read, final int offset) {
|
||||
Integer tile = IlluminaUtil.getTileFromReadName(read.getReadName());
|
||||
if (tile == null) {
|
||||
throw new StingException("Tile number not defined for read");
|
||||
if( exceptionWhenNoTile ) {
|
||||
throw new StingException( "Tile number not defined for read: " + read.getReadName() );
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return tile;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariates1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "604c0d898c9df9acbeeade9979707546" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f03f6186f54bc3f841639a206d424d97");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "fe873886e3efa3566f08a6ae26d71b43" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "996450b7c2e9ef68b9e76d57f2d5288a" );
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "ff1b3a18b67b09560cacc3b5dea0a034");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a93b0263acdc856b885f95848852140d" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -36,6 +36,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -cov QualityScoreCovariate" +
|
||||
" -cov CycleCovariate" +
|
||||
" -cov DinucCovariate" +
|
||||
" -cov TileCovariate" +
|
||||
" --solid_recal_mode SET_Q_ZERO" +
|
||||
" -recalFile %s",
|
||||
1, // just one output file
|
||||
|
|
@ -48,10 +49,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibrator1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "d1a9b38f782af3edf223908cb71c7205" );
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b7f4d3bfb761f29531a37336615046ff" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1faadda11d5c0278575d2f0368a65f14" );
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a39afc94ed74f8137c9d43285997bd90" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate</class>
|
||||
<!-- Local realignment around indels -->
|
||||
<class>org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker</class>
|
||||
|
|
@ -62,6 +63,8 @@
|
|||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate</class>
|
||||
</dependencies>
|
||||
</executable>
|
||||
<resources>
|
||||
|
|
|
|||
Loading…
Reference in New Issue