An addition for Sendu since he can't seem to tell when his CountCovariate jobs die in the middle of writing the CSVs. We now write an EOF marker at the end of the covariates table and look for it when reading in the file in TableRecalibrationWalker. By default, we warn the user if the EOF marker isn't present, but we exception out if the user provides the --fail_with_no_eof_marker option.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3670 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-06-29 18:50:07 +00:00
parent 3ca2b7374b
commit baf9479c35
3 changed files with 27 additions and 12 deletions

View File

@ -485,6 +485,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
} else {
printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
}
// print out an EOF marker
recalTableStream.println(TableRecalibrationWalker.EOF_MARKER);
}
private void printMappingsSorted( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {

View File

@ -53,7 +53,6 @@ import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
@ -104,6 +103,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
/////////////////////////////
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
private boolean NO_PG_TAG = false;
@Argument(fullName="fail_with_no_eof_marker", shortName="requireEOF", required=false, doc="If no EOF marker is present in the covariates file, exit the program with an exception.")
private boolean REQUIRE_EOF = false;
/////////////////////////////
// Private Member Variables
@ -113,6 +114,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
protected static final String EOF_MARKER = "EOF";
private static final long RANDOM_SEED = 1032861495;
private final Random coinFlip = new Random( RANDOM_SEED ); // Random number generator is used to remove reference bias in solid bams
private long numReadsWithMalformedColorSpace = 0;
@ -162,10 +164,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// Read in the data from the csv file and populate the data map and covariates list
logger.info( "Reading in the data from input csv file..." );
boolean sawEOF = false;
try {
for ( String line : new XReadLines(new File( RAC.RECAL_FILE )) ) {
lineNumber++;
if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() ) {
if ( EOF_MARKER.equals(line) ) {
sawEOF = true;
} else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() ) {
; // Skip over the comment lines, (which start with '#')
}
// Read in the covariates that were used from the input file
@ -227,6 +232,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
}
logger.info( "...done!" );
if ( !sawEOF ) {
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
if ( REQUIRE_EOF )
throw new StingException(errorMessage);
logger.warn(errorMessage);
}
logger.info( "The covariates being used here: " );
for( Covariate cov : requestedCovariates ) {
logger.info( "\t" + cov.getClass().getSimpleName() );

View File

@ -17,10 +17,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariates1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7a23c3ffc3917f95117971d642bbeb56" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "89084b43b824f9e3c5e2afdfe0930542");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a582a86adffee2c9ee79a00b424a6cd9" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "b2191ea11f528b9605b727d8a73dd1e1");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "596a9ec9cbc1da70481e45a5a588a41d" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "507dbd3ba6f54e066d04c4d24f59c3ab" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -81,7 +81,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesUseOriginalQuals() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "784488e7024f3e5398ca462d6b8f97c4");
e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "72b79646061d78674a3752272823d47f");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -135,7 +135,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesSolidIndelsRemoveRefBias() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "3889abcc7f6fe420f546fc049bfc2b5a" );
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "013822cfa4f276d48ca99c014c23c124" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -192,7 +192,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCF() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "9b9d21ffb70f15ef2aebad21f3fc05cb");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3700eaf567e4937f442fc777a226d6ad");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -216,7 +216,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCFPlusDBsnp() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "cc1cc9c1ff184d388d81574fdccc608e");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "949c2ecb24a4189e106d372b05ec725f");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -244,7 +244,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "a86c64f649b847b7f81ac50a808d3d45" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "62dab3db2172695cf95fba7f543a4058" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -303,7 +303,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesNoIndex() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "481de3cbecad59c00cc148bbcd279e60" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "abc4248cb5f718594a63409a151d679e" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();