A fix for the 'rod blows up when it hits a GenomeLoc outside the reference' issu
e. Really a stopgap; error handling in the RODs needs to be addressed in a more comprehensive way. Right now, hasNext() isn't guaranteed to be correct. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@878 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ad5b057140
commit
6e60cddfed
|
|
@ -79,7 +79,7 @@ public class HapMapAlleleFrequenciesROD extends BasicReferenceOrderedDatum {
|
|||
varFreq = Double.parseDouble(parts[11]); // CEU_var_freq
|
||||
totalCounts = Integer.parseInt(parts[12]); // CEU_var
|
||||
|
||||
loc = new GenomeLoc(contig, start, stop);
|
||||
loc = GenomeLoc.parseGenomeLoc(contig, start, stop);
|
||||
|
||||
} catch ( RuntimeException e ) {
|
||||
System.out.printf(" Exception caught during parsing HapMap Allele Freq %s%n", Utils.join(" <=> ", parts));
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import org.broadinstitute.sting.gatk.refdata.rodRefSeq;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.MalformedGenomeLocException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
|
|
@ -263,11 +264,36 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
}
|
||||
|
||||
public ROD next() {
|
||||
final String line = parser.next();
|
||||
//System.out.printf("Line is %s%n", line);
|
||||
String parts[] = line.split(fieldDelimiter);
|
||||
ROD n = parseLine(parts);
|
||||
return n != null ? n : next();
|
||||
ROD n = null;
|
||||
boolean success = false;
|
||||
boolean firstFailure = true;
|
||||
|
||||
do {
|
||||
final String line = parser.next();
|
||||
//System.out.printf("Line is %s%n", line);
|
||||
String parts[] = line.split(fieldDelimiter);
|
||||
|
||||
try {
|
||||
n = parseLine(parts);
|
||||
// Two failure conditions:
|
||||
// 1) parseLine throws an exception.
|
||||
// 2) parseLine returns null.
|
||||
// 3) parseLine throws a RuntimeException.
|
||||
// TODO: Clean this up so that all errors are handled in one spot.
|
||||
success = (n != null);
|
||||
}
|
||||
catch( MalformedGenomeLocException ex ) {
|
||||
if( firstFailure ) {
|
||||
Utils.warnUser("Failed to parse contig on line '" + line + "'. Skipping ahead to the next recognized GenomeLoc.");
|
||||
firstFailure = false;
|
||||
}
|
||||
if( !parser.hasNext() )
|
||||
Utils.warnUser("Unable to find more valid reference-ordered data. Giving up.");
|
||||
}
|
||||
|
||||
} while (!success && parser.hasNext());
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
|
|
@ -308,7 +334,8 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
public boolean hasNext() { return it.hasNext(); }
|
||||
public ROD next() {
|
||||
ROD next = it.next();
|
||||
position = next.getLocation().clone();
|
||||
if( next != null )
|
||||
position = next.getLocation().clone();
|
||||
return next;
|
||||
}
|
||||
|
||||
|
|
@ -336,6 +363,8 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
if ( DEBUG ) System.out.printf(" *** starting seek to %s %d%n", loc.getContig(), loc.getStart());
|
||||
while ( hasNext() ) {
|
||||
ROD current = next();
|
||||
if( current == null )
|
||||
continue;
|
||||
//System.out.printf(" -> Seeking to %s %d AT %s %d%n", contigName, pos, current.getContig(), current.getStart());
|
||||
int cmp = current.getLocation().compareTo(loc);
|
||||
if ( cmp < 0 ) {
|
||||
|
|
|
|||
|
|
@ -170,8 +170,8 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
if ( refBaseChar == '*' ) {
|
||||
|
||||
parseIndels(parts[3]) ;
|
||||
if ( varType == DELETION_VARIANT ) loc = new GenomeLoc(contig, start, start+eventLength-1);
|
||||
else loc = new GenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
if ( varType == DELETION_VARIANT ) loc = GenomeLoc.parseGenomeLoc(contig, start, start+eventLength-1);
|
||||
else loc = GenomeLoc.parseGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
}
|
||||
else {
|
||||
parseBasesAndQuals(parts[8], parts[9]);
|
||||
|
|
@ -180,8 +180,8 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
|
||||
refBases = parts[2].toUpperCase();
|
||||
eventLength = 1;
|
||||
//loc = new GenomeLoc(contig, start, start+1);
|
||||
loc = new GenomeLoc(contig, start, start);
|
||||
//loc = GenomeLoc.parseGenomeLoc(contig, start, start+1);
|
||||
loc = GenomeLoc.parseGenomeLoc(contig, start, start);
|
||||
|
||||
char ch = parts[3].charAt(0);
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.apache.log4j.Logger;
|
|||
* ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
|
||||
* assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3"));
|
||||
* String rowData = String.format("%d %d %d", 1, 2, 3);
|
||||
* TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" "));
|
||||
* TabularROD row = new TabularROD("myName", header, GenomeLoc.parseGenomeLoc("chrM", 1), rowData.split(" "));
|
||||
* assertTrue(row.toString().equals("chrM:1\t1\t2\t3"));
|
||||
*/
|
||||
public class TabularROD extends BasicReferenceOrderedDatum implements Map<String, String> {
|
||||
|
|
|
|||
|
|
@ -75,8 +75,8 @@ public class Transcript {
|
|||
else throw new StingException("Expected strand symbol (+/-), found: "+fields[3]);
|
||||
|
||||
String contig_name = fields[2];
|
||||
transcript_interval = new GenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
|
||||
transcript_coding_interval = new GenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]));
|
||||
transcript_interval = GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
|
||||
transcript_coding_interval = GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]));
|
||||
gene_name = fields[12];
|
||||
String[] exon_starts = fields[9].split(",");
|
||||
String[] exon_stops = fields[10].split(",");
|
||||
|
|
@ -89,7 +89,7 @@ public class Transcript {
|
|||
exon_frames = new ArrayList<Integer>(eframes.length);
|
||||
|
||||
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
|
||||
exons.add(new GenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
||||
exons.add(GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
||||
exon_frames.add(Integer.decode(eframes[i]));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import java.util.*;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.MalformedGenomeLocException;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
|
||||
/**
|
||||
|
|
@ -161,7 +162,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
String contig = parts[1];
|
||||
long start = Long.parseLong(parts[2]) + 1; // The final is 0 based
|
||||
long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based
|
||||
loc = new GenomeLoc(contig, start, stop);
|
||||
loc = GenomeLoc.parseGenomeLoc(contig, start, stop);
|
||||
|
||||
name = parts[4];
|
||||
refBases = parts[5];
|
||||
|
|
@ -177,8 +178,11 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
weight = Integer.parseInt(parts[17]);
|
||||
//System.out.printf("Parsed %s%n", toString());
|
||||
return true;
|
||||
} catch( MalformedGenomeLocException ex ) {
|
||||
// Just rethrow malformed genome locs; the ROD system itself will deal with these.
|
||||
throw ex;
|
||||
} catch ( RuntimeException e ) {
|
||||
System.out.printf(" Exception caught during parsing GFFLine %s%n", Utils.join(" <=> ", parts));
|
||||
System.out.printf(" Exception caught during parsing DBSNP line %s%n", Utils.join(" <=> ", parts));
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class rodGFF extends BasicReferenceOrderedDatum {
|
|||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return new GenomeLoc(contig, start, stop);
|
||||
return GenomeLoc.parseGenomeLoc(contig, start, stop);
|
||||
}
|
||||
|
||||
public String getAttribute(final String key) {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ public class rodRefSeq extends BasicReferenceOrderedDatum {
|
|||
|
||||
public rodRefSeq(String name) {
|
||||
super(name);
|
||||
// location = new GenomeLoc(0,0,-1);
|
||||
// location = GenomeLoc.parseGenomeLoc(0,0,-1);
|
||||
}
|
||||
|
||||
/** Despite this constructor is public, it is meant primarily for the internal use; RefSeq iterator will
|
||||
|
|
@ -182,7 +182,7 @@ class refSeqIterator implements Iterator<rodRefSeq> {
|
|||
|
||||
// 'records' and current position are fully updated. We can now create new rod and return it (NOTE: this iterator will break if the list
|
||||
// of pre-loaded records is meddled with by the clients between iterations, so we return them as unmodifiable list)
|
||||
rodRefSeq rod = new rodRefSeq(name,new GenomeLoc(curr_contig_name,curr_position, curr_position),Collections.unmodifiableList(records));
|
||||
rodRefSeq rod = new rodRefSeq(name,GenomeLoc.parseGenomeLoc(curr_contig_name,curr_position, curr_position),Collections.unmodifiableList(records));
|
||||
// if ( (++z) % 1000000 == 0 ) {
|
||||
// System.out.println(rod.getLocation()+": holding "+records.size()+ "; time per 1M ref positions: "+((double)(System.currentTimeMillis()-t)/1000.0)+" s");
|
||||
// z = 0;
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
public GenomeLoc( int contigIndex, final long start, final long stop ) {
|
||||
if(contigInfo == null) { throw new StingException("Contig info has not been setup in the GenomeLoc context yet."); }
|
||||
|
||||
if (contigIndex < 0 || contigIndex >= contigInfo.size()) {
|
||||
if (!isSequenceIndexValid(contigIndex)) {
|
||||
throw new StingException("Contig info has not been setup in the GenomeLoc context yet.");
|
||||
}
|
||||
if (start < 0) { throw new StingException("Bad start position " + start);}
|
||||
|
|
@ -128,6 +128,19 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
return Long.parseLong(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this static constructor when the input data is under limited control (i.e. parsing user data).
|
||||
* @param contig Contig to parse.
|
||||
* @param start Starting point.
|
||||
* @param stop Stop point.
|
||||
* @return The genome location, or a MalformedGenomeLocException if unparseable.
|
||||
*/
|
||||
public static GenomeLoc parseGenomeLoc( final String contig, long start, long stop ) {
|
||||
if( !isContigValid(contig) )
|
||||
throw new MalformedGenomeLocException("Contig " + contig + " is not within installed in the GATK sequence dictionary derived from the reference.");
|
||||
return new GenomeLoc(contig,start,stop);
|
||||
}
|
||||
|
||||
public static GenomeLoc parseGenomeLoc( final String str ) {
|
||||
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||
//System.out.printf("Parsing location '%s'%n", str);
|
||||
|
|
@ -184,7 +197,10 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
stop = getContigInfo(contig).getSequenceLength();
|
||||
}
|
||||
|
||||
GenomeLoc loc = new GenomeLoc(contig, start, stop);
|
||||
if( !isContigValid(contig) )
|
||||
throw new MalformedGenomeLocException("Contig " + contig + " is not within installed in the GATK sequence dictionary derived from the reference.");
|
||||
|
||||
GenomeLoc loc = parseGenomeLoc(contig,start,stop);
|
||||
// System.out.printf(" => Parsed location '%s' into %s%n", str, loc);
|
||||
|
||||
return loc;
|
||||
|
|
@ -583,4 +599,24 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the given contig is valid with respect to the sequence dictionary
|
||||
* already installed in the GenomeLoc.
|
||||
* @return True if the contig is valid. False otherwise.
|
||||
*/
|
||||
private static boolean isContigValid( String contig ) {
|
||||
int contigIndex = contigInfo.getSequenceIndex(contig);
|
||||
return isSequenceIndexValid(contigIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the given sequence index is valid with respect to the sequence dictionary.
|
||||
* @param sequenceIndex sequence index
|
||||
* @return True if the sequence index is valid, false otherwise.
|
||||
*/
|
||||
private static boolean isSequenceIndexValid( int sequenceIndex ) {
|
||||
return sequenceIndex >= 0 && sequenceIndex < contigInfo.size();
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,37 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: Jun 2, 2009
|
||||
* Time: 11:43:48 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Indicates that something was wrong with in the parameters passed to create a GenomeLoc...
|
||||
* bad sequence id out of bounds, etc.
|
||||
*/
|
||||
|
||||
public class MalformedGenomeLocException extends StingException {
|
||||
/**
|
||||
* Create a new MalformedGenomeLocException with the given message. Does not preserve the existing stack trace.
|
||||
* @param message The message.
|
||||
*/
|
||||
public MalformedGenomeLocException( String message ) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new MalformedGenomeLocException with the given message and root cause.
|
||||
* @param message The message.
|
||||
* @param t The root cause.
|
||||
*/
|
||||
public MalformedGenomeLocException( String message, Throwable t ) {
|
||||
super(message,t);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue