An addition for Sendu since he can't seem to tell when his CountCovariate jobs die in the middle of writing the CSVs. We now write an EOF marker at the end of the covariates table and look for it when reading in the file in TableRecalibrationWalker. By default, we warn the user if the EOF marker isn't present, but we exception out if the user provides the --fail_with_no_eof_marker option.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3670 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3ca2b7374b
commit
baf9479c35
|
|
@ -485,6 +485,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
} else {
|
||||
printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
||||
}
|
||||
|
||||
// print out an EOF marker
|
||||
recalTableStream.println(TableRecalibrationWalker.EOF_MARKER);
|
||||
}
|
||||
|
||||
private void printMappingsSorted( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
|
||||
|
|
|
|||
|
|
@ -53,7 +53,6 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
|
|
@ -104,6 +103,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
/////////////////////////////
|
||||
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
private boolean NO_PG_TAG = false;
|
||||
@Argument(fullName="fail_with_no_eof_marker", shortName="requireEOF", required=false, doc="If no EOF marker is present in the covariates file, exit the program with an exception.")
|
||||
private boolean REQUIRE_EOF = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
|
|
@ -113,6 +114,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
||||
protected static final String EOF_MARKER = "EOF";
|
||||
private static final long RANDOM_SEED = 1032861495;
|
||||
private final Random coinFlip = new Random( RANDOM_SEED ); // Random number generator is used to remove reference bias in solid bams
|
||||
private long numReadsWithMalformedColorSpace = 0;
|
||||
|
|
@ -162,10 +164,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// Read in the data from the csv file and populate the data map and covariates list
|
||||
logger.info( "Reading in the data from input csv file..." );
|
||||
|
||||
boolean sawEOF = false;
|
||||
try {
|
||||
for ( String line : new XReadLines(new File( RAC.RECAL_FILE )) ) {
|
||||
lineNumber++;
|
||||
if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() ) {
|
||||
if ( EOF_MARKER.equals(line) ) {
|
||||
sawEOF = true;
|
||||
} else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() ) {
|
||||
; // Skip over the comment lines, (which start with '#')
|
||||
}
|
||||
// Read in the covariates that were used from the input file
|
||||
|
|
@ -227,6 +232,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
logger.info( "...done!" );
|
||||
|
||||
if ( !sawEOF ) {
|
||||
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
|
||||
if ( REQUIRE_EOF )
|
||||
throw new StingException(errorMessage);
|
||||
logger.warn(errorMessage);
|
||||
}
|
||||
|
||||
logger.info( "The covariates being used here: " );
|
||||
for( Covariate cov : requestedCovariates ) {
|
||||
logger.info( "\t" + cov.getClass().getSimpleName() );
|
||||
|
|
|
|||
|
|
@ -17,10 +17,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariates1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7a23c3ffc3917f95117971d642bbeb56" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "89084b43b824f9e3c5e2afdfe0930542");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a582a86adffee2c9ee79a00b424a6cd9" );
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "b2191ea11f528b9605b727d8a73dd1e1");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "596a9ec9cbc1da70481e45a5a588a41d" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "507dbd3ba6f54e066d04c4d24f59c3ab" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -81,7 +81,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesUseOriginalQuals() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "784488e7024f3e5398ca462d6b8f97c4");
|
||||
e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "72b79646061d78674a3752272823d47f");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -135,7 +135,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesSolidIndelsRemoveRefBias() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "3889abcc7f6fe420f546fc049bfc2b5a" );
|
||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "013822cfa4f276d48ca99c014c23c124" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -192,7 +192,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesVCF() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "9b9d21ffb70f15ef2aebad21f3fc05cb");
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "3700eaf567e4937f442fc777a226d6ad");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -216,7 +216,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesVCFPlusDBsnp() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "cc1cc9c1ff184d388d81574fdccc608e");
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "949c2ecb24a4189e106d372b05ec725f");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -244,7 +244,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "a86c64f649b847b7f81ac50a808d3d45" );
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "62dab3db2172695cf95fba7f543a4058" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -303,7 +303,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesNoIndex() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "481de3cbecad59c00cc148bbcd279e60" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "abc4248cb5f718594a63409a151d679e" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
|
|||
Loading…
Reference in New Issue