PairedReadOrderCovariate is now a standard covariate and because of this CycleCovariate no longer multiplies by negative one for second of pair reads. Added PairedReadOrderCovariate to some of the integration tests.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2574 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-13 20:09:10 +00:00
parent 05575e2e56
commit 49c44e7b36
5 changed files with 38 additions and 31 deletions

View File

@ -39,7 +39,7 @@ import net.sf.samtools.SAMRecord;
* For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read)
* For 454 the cycle is the TACG flow cycle, that is, each flow grabs all the TACG's in order in a single cycle
* For example, for the read: AAACCCCGAAATTTTTACTG
* the cycle would be 00000000111222222233
* the cycle would be 11111111222333333344
* For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round
*/
@ -82,7 +82,7 @@ public class CycleCovariate implements StandardCovariate {
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
if( !read.getReadNegativeStrandFlag() ) { // forward direction
if( !read.getReadNegativeStrandFlag() ) { // Forward direction
int iii = 0;
while( iii <= offset )
{
@ -94,7 +94,7 @@ public class CycleCovariate implements StandardCovariate {
if( iii <= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii++; }
}
} else { // negative direction
} else { // Negative direction
int iii = bases.length-1;
while( iii >= offset )
{
@ -141,10 +141,11 @@ public class CycleCovariate implements StandardCovariate {
return getValue( read, offset ); // A recursive call
}
// This functionality was moved to PairedReadOrderCovariate
// Differentiate between first and second of pair
if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) {
cycle *= -1;
}
//if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) {
// cycle *= -1;
//}
return cycle;
}

View File

@ -6,17 +6,19 @@ import net.sf.samtools.SAMRecord;
* Created by IntelliJ IDEA.
* User: chartl
* Date: Dec 16, 2009
* Time: 3:22:19 PM
* To change this template use File | Settings | File Templates.
*/
public class PairedReadOrderCovariate implements ExperimentalCovariate{
public void initialize (final RecalibrationArgumentCollection rac ) { /* do nothing */ }
public class PairedReadOrderCovariate implements StandardCovariate{
public final Comparable getValue(final SAMRecord read, final int offset) {
return read.getReadPairedFlag() ? "Not_Paired" : read.getMateUnmappedFlag() ? "Mate_Unmapped" : read.getFirstOfPairFlag() ? "First_Read" : "Second_Read";
// Initialize any member variables using the command-line arguments passed to the walkers
public void initialize ( final RecalibrationArgumentCollection rac ) { /* do nothing */ }
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
return read.getReadPairedFlag() ? (read.getFirstOfPairFlag() ? "First_Read" : "Second_Read") : "Not_Paired";
}
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
public final Comparable getValue( final String str ) {
return str;
}

View File

@ -123,7 +123,7 @@ public class RecalDataManager {
for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) {
covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
covariateCollapsedKey[1] = key[1]; // and quality score ...
Object theCovariateElement = key[iii + 2]; // and the given covariate
final Object theCovariateElement = key[iii + 2]; // and the given covariate
if( theCovariateElement != null ) {
covariateCollapsedKey[2] = theCovariateElement;
collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get( covariateCollapsedKey );
@ -202,7 +202,7 @@ public class RecalDataManager {
// Check if we need to use the original quality scores instead
if( RAC.USE_ORIGINAL_QUALS ) {
Object attr = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
final Object attr = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
if( attr != null ) {
if( attr instanceof String ) {
read.setBaseQualities( QualityUtils.fastqToPhred((String)attr) );
@ -229,7 +229,7 @@ public class RecalDataManager {
}
if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user
String oldPlatform = readGroup.getPlatform();
final String oldPlatform = readGroup.getPlatform();
readGroup = new SAMReadGroupRecord( RAC.FORCE_READ_GROUP );
readGroup.setPlatform( oldPlatform );
((GATKSAMRecord)read).setReadGroup( readGroup );
@ -252,7 +252,7 @@ public class RecalDataManager {
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if( read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") ) {
if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read
Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if( attr != null ) {
char[] colorSpace;
if( attr instanceof String ) {
@ -309,7 +309,7 @@ public class RecalDataManager {
// Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read
byte[] readBases = read.getReadBases();
byte[] colorImpliedBases = readBases.clone();
final byte[] colorImpliedBases = readBases.clone();
char[] refBasesDirRead = refBases;
if( read.getReadNegativeStrandFlag() ) {
readBases = BaseUtils.simpleReverseComplement( read.getReadBases() );
@ -489,7 +489,7 @@ public class RecalDataManager {
return inconsistency[offset] != 0;
}
// This block of code is if you want to check both the offset and the next base for color space inconsistency
// This block of code is for if you want to check both the offset and the next base for color space inconsistency
//if( read.getReadNegativeStrandFlag() ) { // Negative direction
// if( offset == 0 ) {
// return inconsistency[0] != 0;
@ -503,6 +503,7 @@ public class RecalDataManager {
// return (inconsistency[offset] != 0) || (inconsistency[offset + 1] != 0);
// }
//}
} else {
return false;
}

View File

@ -16,10 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariates1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "89084b43b824f9e3c5e2afdfe0930542");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "6ede6fc840c4e5070a58a919b48e7504" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e67a329112061beba0f774844e5f4635" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "a626fa8f9c3836ec9764a718c4b6a27f");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "284939b3760a95f51c350c8340b99b31" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "c7c28128edf4a54e6ac9e752ebaca697" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -37,6 +37,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -cov CycleCovariate" +
" -cov DinucCovariate" +
" -cov TileCovariate" +
" -cov PairedReadOrderCovariate" +
" --solid_recal_mode SET_Q_ZERO" +
" -recalFile %s",
1, // just one output file
@ -49,10 +50,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e06f1397b9c40f75e96cd3df76730ee0");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "48ddc93cae054f9423f3a7ed9f36540e" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "c0dbc69726e1827e1eabdc777d8435e2" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "b6a1749aa311f5c50097280579c638a3");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b5bd41672805e15c6a5187c902c1ef9a" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "51aeb06db11b66d9e1199ebed9c0562c" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -80,7 +81,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorMaxQ70() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "665711dfb81d67582b28faea24e26160" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "df4f9bf64cf1a7aa890b28de043cac40" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -109,7 +110,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCF() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "9b9d21ffb70f15ef2aebad21f3fc05cb");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e11048d253a5ca719a373a9dd8e18f40");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -136,7 +137,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCFPlusDBsnp() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "cc1cc9c1ff184d388d81574fdccc608e");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "33fa86ad0116f727dd6428bb5a2daf30");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -164,7 +165,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "a86c64f649b847b7f81ac50a808d3d45" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "e7031fdf3a88c27ce1b3d5d425860286" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -193,7 +194,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "32ad300e8c094ed2c1ec6c531180fe70" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "de1a08b45250f090f19e6a35d449c50c" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();

View File

@ -26,6 +26,7 @@
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.PairedReadOrderCovariate</class>
<!-- Local realignment around indels -->
<class>org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector</class>
<class>org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker</class>
@ -65,6 +66,7 @@
<class>org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate</class>
<class>org.broadinstitute.sting.gatk.walkers.recalibration.PairedReadOrderCovariate</class>
</dependencies>
</executable>
<resources>