diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index d1d4ff914..02af884a2 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -373,16 +373,16 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { if ( featureDescriptor != null ) { tribbleType = featureDescriptor.getName(); logger.warn("Dynamically determined type of " + file + " to be " + tribbleType); + } else { + throw new UserException.CommandLineException( + String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + + "Please add an explicit type tag :TYPE listing the correct type from among the supported types: %s", + manager.userFriendlyListOfAvailableFeatures())); } } } } - if ( tribbleType == null ) // error handling - throw new UserException.CommandLineException( - String.format("Could not parse argument %s with value %s", - defaultDefinition.fullName, value)); - Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); Class parameterType = getParameterizedTypeClass(type); RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); @@ -395,8 +395,8 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { value, source.field.getName())); } catch (Exception e) { throw new UserException.CommandLineException( - String.format("Failed to parse value %s for argument %s.", - value, source.field.getName())); + String.format("Failed to parse value %s for argument %s. Message: %s", + value, source.field.getName(), e.getMessage())); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java index 89ee65532..cec40b5bd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java @@ -1,110 +1,110 @@ -package org.broadinstitute.sting.gatk.refdata.features.refseq; - -import org.apache.commons.io.filefilter.FalseFileFilter; -import org.broad.tribble.Feature; -import org.broad.tribble.TribbleException; -import org.broad.tribble.readers.LineReader; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.util.ArrayList; - -/** - * the ref seq codec - */ -public class RefSeqCodec implements ReferenceDependentFeatureCodec { - - /** - * The parser to use when resolving genome-wide locations. - */ - private GenomeLocParser genomeLocParser; - private boolean zero_coding_length_user_warned = false; - /** - * Set the parser to use when resolving genetic data. - * @param genomeLocParser The supplied parser. - */ - @Override - public void setGenomeLocParser(GenomeLocParser genomeLocParser) { - this.genomeLocParser = genomeLocParser; - } - - @Override - public Feature decodeLoc(String line) { - if (line.startsWith("#")) return null; - String fields[] = line.split("\t"); - if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length); - String contig_name = fields[2]; - try { - return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); - } catch ( UserException.MalformedGenomeLoc e ) { - Utils.warnUser("RefSeq file is potentially incorrect, as some transcripts or exons have a negative length ("+fields[2]+")"); - return null; - } - } - - /** Fills this object from a text line in RefSeq (UCSC) text dump file */ - @Override - public RefSeqFeature decode(String line) { - if (line.startsWith("#")) return null; - String fields[] = line.split("\t"); - - // we reference postion 15 in the split array below, make sure we have at least that many columns - if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length); - String contig_name = fields[2]; - RefSeqFeature feature = new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); - - feature.setTranscript_id(fields[1]); - if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1); - else if ( fields[3].length()==1 && fields[3].charAt(0)=='-') feature.setStrand(-1); - else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line); - - int coding_start = Integer.parseInt(fields[6])+1; - int coding_stop = Integer.parseInt(fields[7]); - - if ( coding_start > coding_stop ) { - if ( ! zero_coding_length_user_warned ) { - Utils.warnUser("RefSeq file contains transcripts with zero coding length. "+ - "Such transcripts will be ignored (this warning is printed only once)"); - zero_coding_length_user_warned = true; - } - return null; - } - - feature.setTranscript_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); - feature.setTranscript_coding_interval(genomeLocParser.createGenomeLoc(contig_name, coding_start, coding_stop)); - feature.setGene_name(fields[12]); - String[] exon_starts = fields[9].split(","); - String[] exon_stops = fields[10].split(","); - String[] eframes = fields[15].split(","); - - if ( exon_starts.length != exon_stops.length ) - throw new UserException.MalformedFile("Data format error: numbers of exon start and stop positions differ for line=" + line); - if ( exon_starts.length != eframes.length ) - throw new UserException.MalformedFile("Data format error: numbers of exons and exon frameshifts differ for line=" + line); - - ArrayList exons = new ArrayList(exon_starts.length); - ArrayList exon_frames = new ArrayList(eframes.length); - - for ( int i = 0 ; i < exon_starts.length ; i++ ) { - exons.add(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); - exon_frames.add(Integer.decode(eframes[i])); - } - - feature.setExons(exons); - feature.setExon_frames(exon_frames); - return feature; - } - - @Override - public Object readHeader(LineReader reader) { - return null; - } - - @Override - public Class getFeatureType() { - return RefSeqCodec.class; - } -} +package org.broadinstitute.sting.gatk.refdata.features.refseq; + +import org.apache.commons.io.filefilter.FalseFileFilter; +import org.broad.tribble.Feature; +import org.broad.tribble.TribbleException; +import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.util.ArrayList; + +/** + * the ref seq codec + */ +public class RefSeqCodec implements ReferenceDependentFeatureCodec { + + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + private boolean zero_coding_length_user_warned = false; + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + @Override + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + + @Override + public Feature decodeLoc(String line) { + if (line.startsWith("#")) return null; + String fields[] = line.split("\t"); + if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length); + String contig_name = fields[2]; + try { + return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); + } catch ( UserException.MalformedGenomeLoc e ) { + Utils.warnUser("RefSeq file is potentially incorrect, as some transcripts or exons have a negative length ("+fields[2]+")"); + return null; + } + } + + /** Fills this object from a text line in RefSeq (UCSC) text dump file */ + @Override + public RefSeqFeature decode(String line) { + if (line.startsWith("#")) return null; + String fields[] = line.split("\t"); + + // we reference postion 15 in the split array below, make sure we have at least that many columns + if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length); + String contig_name = fields[2]; + RefSeqFeature feature = new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); + + feature.setTranscript_id(fields[1]); + if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1); + else if ( fields[3].length()==1 && fields[3].charAt(0)=='-') feature.setStrand(-1); + else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line); + + int coding_start = Integer.parseInt(fields[6])+1; + int coding_stop = Integer.parseInt(fields[7]); + + if ( coding_start > coding_stop ) { + if ( ! zero_coding_length_user_warned ) { + Utils.warnUser("RefSeq file contains transcripts with zero coding length. "+ + "Such transcripts will be ignored (this warning is printed only once)"); + zero_coding_length_user_warned = true; + } + return null; + } + + feature.setTranscript_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); + feature.setTranscript_coding_interval(genomeLocParser.createGenomeLoc(contig_name, coding_start, coding_stop)); + feature.setGene_name(fields[12]); + String[] exon_starts = fields[9].split(","); + String[] exon_stops = fields[10].split(","); + String[] eframes = fields[15].split(","); + + if ( exon_starts.length != exon_stops.length ) + throw new UserException.MalformedFile("Data format error: numbers of exon start and stop positions differ for line=" + line); + if ( exon_starts.length != eframes.length ) + throw new UserException.MalformedFile("Data format error: numbers of exons and exon frameshifts differ for line=" + line); + + ArrayList exons = new ArrayList(exon_starts.length); + ArrayList exon_frames = new ArrayList(eframes.length); + + for ( int i = 0 ; i < exon_starts.length ; i++ ) { + exons.add(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); + exon_frames.add(Integer.decode(eframes[i])); + } + + feature.setExons(exons); + feature.setExon_frames(exon_frames); + return feature; + } + + @Override + public Object readHeader(LineReader reader) { + return null; + } + + @Override + public Class getFeatureType() { + return RefSeqFeature.class; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 61d53679a..b0e25e55b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -160,8 +160,8 @@ public class ListFileUtils { rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures())); if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) ) throw new UserException.BadArgumentValue(rodBinding.getName(), - String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s. Please make sure that you have provided the correct file type and/or that you are not binding your rod to a name matching one of the available types.", - rodBinding.getName(), rodBinding.getType(), descriptor.getName())); + String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s producing %s. Please make sure that you have provided the correct file type and/or that you are not binding your rod to a name matching one of the available types.", + rodBinding.getName(), rodBinding.getType(), descriptor.getName(), descriptor.getFeatureClass())); rodBindings.add(triplet);