Tile is now a standard covariate. By default the TileCovariate returns -1 if tile can't be derived from the read's name. Added a new command line option -throwTileException which will force TileCovariate to throw an exception if tile can't be derived for a read. Singleton covariates, such as any read group without tile info, must be skipped over in TableRecalibration so that the sequential formulation doesn't apply the same correction more than once. TileCovariate class has been added to the Early Access package.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2544 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d01bde36a4
commit
f587ff46af
|
|
@ -141,6 +141,7 @@ public class RecalDataManager {
|
||||||
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
|
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
|
||||||
for( NestedHashMap map : dataCollapsedByCovariate ) {
|
for( NestedHashMap map : dataCollapsedByCovariate ) {
|
||||||
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
|
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
|
||||||
|
checkForSingletons(map.data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -156,6 +157,20 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkForSingletons( final Map data ) {
|
||||||
|
|
||||||
|
for( Object comp : data.keySet() ) {
|
||||||
|
final Object val = data.get(comp);
|
||||||
|
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
|
||||||
|
if( data.keySet().size() == 1) {
|
||||||
|
data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done in a previous sequential step
|
||||||
|
}
|
||||||
|
} else { // Another layer in the nested hash map
|
||||||
|
checkForSingletons( (Map) val );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the appropriate collapsed table out of the set of all the tables held by this Object
|
* Get the appropriate collapsed table out of the set of all the tables held by this Object
|
||||||
* @param covariate Which covariate indexes the desired collapsed HashMap
|
* @param covariate Which covariate indexes the desired collapsed HashMap
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,9 @@ public class RecalibrationArgumentCollection {
|
||||||
public int WINDOW_SIZE = 5;
|
public int WINDOW_SIZE = 5;
|
||||||
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
|
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
|
||||||
public int HOMOPOLYMER_NBACK = 7;
|
public int HOMOPOLYMER_NBACK = 7;
|
||||||
|
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
|
||||||
|
public boolean EXCEPTION_IF_NO_TILE = false;
|
||||||
|
|
||||||
|
|
||||||
public boolean checkSolidRecalMode() {
|
public boolean checkSolidRecalMode() {
|
||||||
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
|
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@
|
||||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
* THE SOFTWARE.
|
* THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
@ -30,17 +31,25 @@ import edu.mit.broad.picard.illumina.parser.IlluminaUtil;
|
||||||
/**
|
/**
|
||||||
* @author alecw@broadinstitute.org
|
* @author alecw@broadinstitute.org
|
||||||
*/
|
*/
|
||||||
public class TileCovariate implements ExperimentalCovariate {
|
|
||||||
|
public class TileCovariate implements StandardCovariate {
|
||||||
|
|
||||||
|
private static boolean exceptionWhenNoTile = false;
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
public void initialize( final RecalibrationArgumentCollection RAC ) {
|
public void initialize( final RecalibrationArgumentCollection RAC ) {
|
||||||
|
exceptionWhenNoTile = RAC.EXCEPTION_IF_NO_TILE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
public Comparable getValue(final SAMRecord read, final int offset) {
|
public Comparable getValue(final SAMRecord read, final int offset) {
|
||||||
Integer tile = IlluminaUtil.getTileFromReadName(read.getReadName());
|
Integer tile = IlluminaUtil.getTileFromReadName(read.getReadName());
|
||||||
if (tile == null) {
|
if (tile == null) {
|
||||||
throw new StingException("Tile number not defined for read");
|
if( exceptionWhenNoTile ) {
|
||||||
|
throw new StingException( "Tile number not defined for read: " + read.getReadName() );
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return tile;
|
return tile;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testCountCovariates1() {
|
public void testCountCovariates1() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "604c0d898c9df9acbeeade9979707546" );
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" );
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "f03f6186f54bc3f841639a206d424d97");
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "ff1b3a18b67b09560cacc3b5dea0a034");
|
||||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "fe873886e3efa3566f08a6ae26d71b43" );
|
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" );
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "996450b7c2e9ef68b9e76d57f2d5288a" );
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a93b0263acdc856b885f95848852140d" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
@ -36,6 +36,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
" -cov QualityScoreCovariate" +
|
" -cov QualityScoreCovariate" +
|
||||||
" -cov CycleCovariate" +
|
" -cov CycleCovariate" +
|
||||||
" -cov DinucCovariate" +
|
" -cov DinucCovariate" +
|
||||||
|
" -cov TileCovariate" +
|
||||||
" --solid_recal_mode SET_Q_ZERO" +
|
" --solid_recal_mode SET_Q_ZERO" +
|
||||||
" -recalFile %s",
|
" -recalFile %s",
|
||||||
1, // just one output file
|
1, // just one output file
|
||||||
|
|
@ -48,10 +49,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testTableRecalibrator1() {
|
public void testTableRecalibrator1() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "d1a9b38f782af3edf223908cb71c7205" );
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" );
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e");
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e");
|
||||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b7f4d3bfb761f29531a37336615046ff" );
|
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" );
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1faadda11d5c0278575d2f0368a65f14" );
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a39afc94ed74f8137c9d43285997bd90" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
String bam = entry.getKey();
|
String bam = entry.getKey();
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate</class>
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate</class>
|
||||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
|
||||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
|
||||||
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate</class>
|
||||||
<!-- Local realignment around indels -->
|
<!-- Local realignment around indels -->
|
||||||
<class>org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector</class>
|
<class>org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector</class>
|
||||||
<class>org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker</class>
|
<class>org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker</class>
|
||||||
|
|
@ -62,6 +63,8 @@
|
||||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate</class>
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate</class>
|
||||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate</class>
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate</class>
|
||||||
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
|
||||||
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
|
||||||
|
<class>org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate</class>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</executable>
|
</executable>
|
||||||
<resources>
|
<resources>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue