diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index bfc4dc534..5a96ce838 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -2,14 +2,19 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.tracks.QueryableTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import java.io.IOException; +import java.util.Arrays; +import java.util.LinkedList; import java.util.List; /** * User: hanna @@ -36,16 +41,16 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { /** * A pool of iterators for navigating through the genome. */ - private ReferenceOrderedDataPool iteratorPool = null; + private final ReferenceOrderedDataPool iteratorPool; /** * Create a new reference-ordered data source. - * @param rod + * @param rod the reference ordered data */ public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) { this.rod = rod; - // if (!rod.supportsQuery()) // TODO: Aaron turn on to enable Tribble searches - this.iteratorPool = new ReferenceOrderedDataPool( walker, rod ); + if (rod.supportsQuery()) iteratorPool = null; + else iteratorPool = new ReferenceOrderedDataPool( walker, rod ); } /** @@ -70,6 +75,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @return Iterator through the data. */ public LocationAwareSeekableRODIterator seek( Shard shard ) { + if (iteratorPool == null) // use query + return getQuery(shard.getGenomeLocs()); DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream(); LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment); return RODIterator; @@ -83,18 +90,28 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @return Iterator through the data. */ public LocationAwareSeekableRODIterator seek(GenomeLoc loc) { + if (iteratorPool == null) // use query + return getQuery(Arrays.asList(loc)); DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream(); LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment); return RODIterator; } + /** + * assuming the ROD is a queryable ROD, use that interface to get an iterator to the selected region + * @param loc the region to query for + * @return a LocationAwareSeekableRODIterator over the selected region + */ + private LocationAwareSeekableRODIterator getQuery(List loc) { + return new StitchingLocationAwareSeekableRODIterator(loc,(QueryableTrack)rod); + } /** * Close the specified iterator, returning it to the pool. * @param iterator Iterator to close. */ public void close( LocationAwareSeekableRODIterator iterator ) { - this.iteratorPool.release(iterator); + if (iteratorPool != null) iteratorPool.release(iterator); } } @@ -169,4 +186,80 @@ class ReferenceOrderedDataPool extends ResourcePool locationList; + + // The reference-ordered data itself. + private final QueryableTrack rod; + + // the current iterator + private SeekableRODIterator iterator; + + StitchingLocationAwareSeekableRODIterator(List list, QueryableTrack rmd) { + rod = rmd; + locationList = new LinkedList(); + locationList.addAll(list); + fetchNextInterval(); + } + + @Override + public GenomeLoc peekNextLocation() { + if (iterator == null) return null; + return iterator.peekNextLocation(); + } + + @Override + public GenomeLoc position() { + if (iterator == null) return null; + return iterator.position(); + } + + @Override + public RODRecordList seekForward(GenomeLoc interval) { + RODRecordList list = iterator.seekForward(interval); + if (list == null) { // we were unable to seek the current interval to the location + fetchNextInterval(); + list = iterator.seekForward(interval); + } + return list; + } + + @Override + public boolean hasNext() { + if (iterator == null) return false; + return iterator.hasNext(); + } + + @Override + public RODRecordList next() { + if (!hasNext()) throw new IllegalStateException("StitchingLocationAwareSeekableRODIterator: We do not have a next"); + RODRecordList list = iterator.next(); + if (!iterator.hasNext()) fetchNextInterval(); + return list; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("\"Thou shall not remove()!\" - Software Engineering Team"); + } + + private void fetchNextInterval() { + if (locationList != null && locationList.size() > 0) { + GenomeLoc loc = locationList.getFirst(); + locationList.removeFirst(); + try { + iterator = new SeekableRODIterator(rod.query(loc)); + } catch (IOException e) { + throw new StingException("Unable to query iterator with location " + loc + " and rod name of " + ((RMDTrack)rod).getName()); + } + } + } +} + diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java deleted file mode 100755 index 071cbd777..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -public class RodGeliText extends BasicReferenceOrderedDatum { - public enum Genotype_Strings { - AA, AC, AG, AT, CC, CG, CT, GG, GT, TT - } - - public GenomeLoc loc; - public char refBase = 'N'; - public int depth; - public int maxMappingQuality; - public String bestGenotype = "NN"; - public double lodBtr; - public double lodBtnb; - public double[] genotypePosteriors = new double[10]; - - public RodGeliText(final String name) { - super(name); - } - - public String delimiterRegex() { - return "\\s+"; - } - - public boolean parseLine(Object header, String[] parts) throws IOException { - if (parts.length < 18) - throw new IOException("Invalid rodVariant row found -- too few elements. Expected 18+, got " + parts.length); - if (!parts[0].startsWith("#")) { - loc = GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1])); - refBase = Character.toUpperCase(parts[2].charAt(0)); - depth = Integer.valueOf(parts[3]); - maxMappingQuality = Integer.valueOf(parts[4]); - - // UPPER case and sort - char[] x = parts[5].toUpperCase().toCharArray(); - Arrays.sort(x); - bestGenotype = new String(x); - - lodBtr = Double.valueOf(parts[6]); - lodBtnb = Double.valueOf(parts[7]); - - for (int pieceIndex = 8, offset = 0; pieceIndex < 18; pieceIndex++, offset++) { - genotypePosteriors[offset] = Double.valueOf(parts[pieceIndex]); - } - - return true; - } - - return false; - } - - public String toString() { - return String.format("%s\t%d\t%c\t%d\t%d\t%s\t%4.4f\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f", - loc.getContig(), - loc.getStart(), - refBase, - depth, - maxMappingQuality, - bestGenotype, - lodBtr, - lodBtnb, - genotypePosteriors[0], - genotypePosteriors[1], - genotypePosteriors[2], - genotypePosteriors[3], - genotypePosteriors[4], - genotypePosteriors[5], - genotypePosteriors[6], - genotypePosteriors[7], - genotypePosteriors[8], - genotypePosteriors[9] - ); - } - - public GenomeLoc getLocation() { - return loc; - } - - /** - * get the reference base(s) at this position - * - * @return the reference base or bases, as a string - */ - public String getReference() { - return String.valueOf(this.refBase); - } - - - /** - * get the -1 * (log 10 of the error value) - * - * @return the log based error estimate - */ - public double getNegLog10PError() { - return Math.abs(lodBtr); - } - - /** - * gets the alternate alleles. This method should return all the alleles present at the location, - * NOT including the reference base. This is returned as a string list with no guarantee ordering - * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest - * frequency). - * - * @return an alternate allele list - */ - public List getAlternateAlleleList() { - List list = new ArrayList(); - for (char base : bestGenotype.toCharArray()) - if (base != refBase) - list.add(String.valueOf(base)); - return list; - } - - /** - * gets the alleles. This method should return all the alleles present at the location, - * including the reference base. The first allele should always be the reference allele, followed - * by an unordered list of alternate alleles. - * - * @return an alternate allele list - */ - public List getAlleleList() { - List list = new ArrayList(); - if (this.bestGenotype.contains(getReference())) list.add(getReference()); - for (char c : this.bestGenotype.toCharArray()) - if (c != Utils.stringToChar(getReference())) - list.add(String.valueOf(c)); - return list; - } - - public String getRefBasesFWD() { - return String.format("%c", getRefSnpFWD()); - } - - public char getRefSnpFWD() throws IllegalStateException { - return refBase; - } - - public String getAltBasesFWD() { - return String.format("%c", getAltSnpFWD()); - } - - public char getAltSnpFWD() throws IllegalStateException { - // both ref and bestGenotype have been uppercased, so it's safe to use == - char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0); - //System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c); - return c; - } - - public boolean isReference() { - return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1); - } - - /** - * get the frequency of this variant - * - * @return VariantFrequency with the stored frequency - */ - public double getNonRefAlleleFrequency() { - return 1.0; - } - - public boolean isSNP() { - if (this.getReference().length() == 1) - return (this.refBase != this.bestGenotype.charAt(0) || this.refBase != this.bestGenotype.charAt(1)); - return false; - } - - public boolean isInsertion() { - return false; - } - - public boolean isDeletion() { - return false; - } - - public boolean isIndel() { - return false; - } - - /** - * gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case - * of - * - * @return a char, representing the alternate base - */ - public char getAlternativeBaseForSNP() { - if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); - // we know that if we're a SNP, the alt is a single base - if (this.bestGenotype.toString().charAt(0) == getReference().charAt(0)) - return this.bestGenotype.toString().charAt(1); - return this.bestGenotype.toString().charAt(0); - - } - - /** - * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * - * @return a char, representing the alternate base - */ - public char getReferenceForSNP() { - if (!isSNP()) throw new IllegalStateException("This site is not a SNP"); - // we know that if we're a SNP, the reference is a single base - if (bestGenotype.toString().charAt(0) != getReference().charAt(0)) - return bestGenotype.toString().charAt(1); - else - return bestGenotype.toString().charAt(0); - } - - public double getMAF() { - return 0; - } - - public double getHeterozygosity() { - return 0; - } - - public boolean isGenotype() { - return true; - } - - public double getVariationConfidence() { - return lodBtr; - } - - public double getConsensusConfidence() { - return lodBtnb; - } - - public List getGenotype() throws IllegalStateException { - return Arrays.asList(getBestGenotype()); - } - - public int getPloidy() throws IllegalStateException { - return 2; - } - - public boolean isBiallelic() { - return true; - } - - - public int length() { - return 1; - } - - public char getReferenceBase() { - return refBase; - } - - public int getPileupDepth() { - return depth; - } - - public int getMaxMappingQuality() { - return maxMappingQuality; - } - - public String getBestGenotype() { - return bestGenotype; - } - - public double getLodBtr() { - return lodBtr; - } - - public double getLodBtnb() { - return lodBtnb; - } - - public double[] getGenotypePosteriors() { - return genotypePosteriors; - } - - public void adjustLikelihoods(double[] likelihoods) { - for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) { - genotypePosteriors[likelihoodIndex] += likelihoods[likelihoodIndex]; - } - - String bestGenotype = "NN"; - double bestLikelihood = Double.NEGATIVE_INFINITY; - double nextBestLikelihood = Double.NEGATIVE_INFINITY; - double refLikelihood = Double.NEGATIVE_INFINITY; - - for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) { - if (genotypePosteriors[likelihoodIndex] > bestLikelihood) { - bestLikelihood = genotypePosteriors[likelihoodIndex]; - - bestGenotype = Genotype_Strings.values()[likelihoodIndex].toString(); - } - } - - for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) { - if (genotypePosteriors[likelihoodIndex] > nextBestLikelihood && genotypePosteriors[likelihoodIndex] < bestLikelihood) { - nextBestLikelihood = genotypePosteriors[likelihoodIndex]; - } - } - - for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) { - if (refBase == Genotype_Strings.values()[likelihoodIndex].toString().charAt(0) && - refBase == Genotype_Strings.values()[likelihoodIndex].toString().charAt(1)) { - refLikelihood = genotypePosteriors[likelihoodIndex]; - } - } - - this.bestGenotype = bestGenotype; - this.lodBtr = (bestLikelihood - refLikelihood); - this.lodBtnb = (bestLikelihood - nextBestLikelihood); - } - - public boolean equals(RodGeliText other) { - if (genotypePosteriors.length != genotypePosteriors.length) return false; - for (int x = 0; x < genotypePosteriors.length; x++) - if (MathUtils.compareDoubles(genotypePosteriors[x],other.genotypePosteriors[x],1e-4)!=0) return false; - return (loc.equals(other.getLocation()) && - refBase == other.refBase && - depth == other.depth && - maxMappingQuality == other.maxMappingQuality && - bestGenotype.equals(other.bestGenotype) && - MathUtils.compareDoubles(lodBtr,other.lodBtr,1e-4) == 0&& // 1e-4 is about the precision the - MathUtils.compareDoubles(lodBtnb,other.lodBtnb,1e-4) == 0); // output file seems to cooperate with - } - - -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 218cef8eb..ab835b6b4 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.refdata; import edu.mit.broad.picard.genotype.DiploidGenotype; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; +import org.broad.tribble.gelitext.GeliTextFeature; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; @@ -48,7 +49,7 @@ public class VariantContextAdaptors { adaptors.put(PlinkRod.class, new PlinkRodAdaptor()); adaptors.put(HapMapROD.class, new HapMapAdaptor()); adaptors.put(RodGLF.class, new GLFAdaptor()); - adaptors.put(RodGeliText.class, new GeliTextAdaptor()); + adaptors.put(GeliTextFeature.class, new GeliTextAdaptor()); adaptors.put(rodGELI.class, new GeliAdaptor()); } @@ -615,39 +616,42 @@ public class VariantContextAdaptors { * @return a VariantContext object */ VariantContext convert(String name, Object input, ReferenceContext ref) { - RodGeliText geli = (RodGeliText)input; - if ( ! Allele.acceptableAlleleBases(geli.getReference()) ) + GeliTextFeature geli = (GeliTextFeature)input; + if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase())) ) return null; - Allele refAllele = new Allele(geli.getReference(), true); + Allele refAllele = new Allele(String.valueOf(geli.getRefBase()), true); // make sure we can convert it - if ( geli.isSNP() || geli.isIndel()) { + if ( geli.getGenotype().isHet() || !geli.getGenotype().containsBase(geli.getRefBase())) { // add the reference allele List alleles = new ArrayList(); - alleles.add(refAllele); - + List genotypeAlleles = new ArrayList(); // add all of the alt alleles - for ( String alt : geli.getAlternateAlleleList() ) { - if ( ! Allele.acceptableAlleleBases(alt) ) { + for ( char alt : geli.getGenotype().toString().toCharArray() ) { + if ( ! Allele.acceptableAlleleBases(String.valueOf(alt)) ) { return null; } - Allele allele = new Allele(alt, false); - if (!alleles.contains(allele)) alleles.add(allele); - } + Allele allele = new Allele(String.valueOf(alt), false); + if (!alleles.contains(allele) && !refAllele.basesMatch(allele.getBases())) alleles.add(allele); + // add the allele, first checking if it's reference or not + if (!refAllele.basesMatch(allele.getBases())) genotypeAlleles.add(allele); + else genotypeAlleles.add(refAllele); + } Map attributes = new HashMap(); Collection genotypes = new ArrayList(); - MutableGenotype call = new MutableGenotype(name, alleles); + MutableGenotype call = new MutableGenotype(name, genotypeAlleles); // set the likelihoods, depth, and RMS mapping quality values - call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.genotypePosteriors); - call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaxMappingQuality()); - call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.depth); + call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.getLikelihoods()); + call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaximumMappingQual()); + call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.getDepthOfCoverage()); // add the call to the genotype list, and then use this list to create a VariantContext genotypes.add(call); - VariantContext vc = new VariantContext(name, geli.getLocation(), alleles, genotypes, geli.getNegLog10PError(), null, attributes); + alleles.add(refAllele); + VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes); vc.validate(); return vc; } else diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureReaderTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureReaderTrack.java index 648ac17c2..e88f1b43b 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureReaderTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureReaderTrack.java @@ -101,4 +101,14 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack { public Iterator query(String contig, int start, int stop, boolean contained) throws IOException { return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop, contained),this.getName()); } + + @Override + public void close() { + try { + reader.close(); + } catch (IOException e) { + throw new StingException("Unable to close reader " + reader.toString(),e); + } + reader = null; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java index fecce12f6..a8dc82de3 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java @@ -41,4 +41,5 @@ public interface QueryableTrack { public Iterator query(final GenomeLoc interval, final boolean contained) throws IOException; public Iterator query(final String contig, final int start, final int stop) throws IOException; public Iterator query(final String contig, final int start, final int stop, final boolean contained) throws IOException; + public void close(); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java index ba8f64253..fb0fd3b25 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java @@ -96,9 +96,10 @@ public class RMDTrackManager extends PluginManager { // create a track builder instance for each track builder, and find out what tracks we can make for (String builderName : this.pluginsByName.keySet()) { RMDTrackBuilder builder = this.createByName(builderName); - for (String name : builder.getAvailableTrackNamesAndTypes().keySet()) { + Map mapping = builder.getAvailableTrackNamesAndTypes(); + for (String name : mapping.keySet()) { availableTracks.put(name.toUpperCase(), builder); - availableTrackClasses.put(name.toUpperCase(), builder.getAvailableTrackNamesAndTypes().get(name)); + availableTrackClasses.put(name.toUpperCase(), mapping.get(name)); } } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java index 1a7050a36..f189e4228 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java @@ -66,7 +66,6 @@ public class RODTrackBuilder implements RMDTrackBuilder { Types.put("PointIndel", PointIndelROD.class); Types.put("HapMap", HapMapROD.class); Types.put("Intervals", IntervalRod.class); - Types.put("Variants", RodGeliText.class); Types.put("GLF", RodGLF.class); Types.put("VCF", RodVCF.class); Types.put("PicardDbSNP", rodPicardDbSNP.class); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java index 92244e57c..822f51192 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java @@ -66,8 +66,9 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen @Override public Map getAvailableTrackNamesAndTypes() { Map classes = new HashMap(); - //for (String c : this.pluginsByName.keySet()) // TODO: Aaron uncomment these two lines when Tribble is live - // if (!c.contains("SNP")) classes.put(c,this.pluginsByName.get(c)); + for (String c : this.pluginsByName.keySet()) + // we're excluding dbSNP and VCF right now + if (!c.contains("SNP") && !c.contains("VCF")) classes.put(c,this.pluginsByName.get(c)); return classes; } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java index 824118641..d16e6862f 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java @@ -4,6 +4,8 @@ import edu.mit.broad.picard.genotype.geli.GeliFileReader; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import net.sf.samtools.SAMFileReader; import net.sf.samtools.util.CloseableIterator; +import org.broad.tribble.gelitext.GeliTextCodec; +import org.broad.tribble.gelitext.GeliTextFeature; import org.broad.tribble.util.AsciiLineReader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -131,10 +133,10 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI,tempFile); ((GeliTextWriter)gw).writeHeader(null); // the write header command ignores the parameter - RodGeliText geliText = new RodGeliText("myROD"); // now cycle the input file to the output file - + GeliTextFeature geliText; + GeliTextCodec codec = new GeliTextCodec(); // buffer the records we see - List records = new ArrayList(); + List records = new ArrayList(); // a little more complicated than the GLF example, we have to read the file in AsciiLineReader reader = createReader(knownFile); @@ -144,20 +146,17 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { // while we have records, make a Variant Context and output it to a GLF file while (line != null && line != "") { - parseGeliText(geliText, line); + geliText = (GeliTextFeature)codec.decode(line); records.add(geliText); // we know they're all single calls in the reference file VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText); - gw.addCall(vc,null); + if (vc != null) gw.addCall(vc,null); line = readLine(reader); } gw.close(); // close the file reader.close(); - // now reopen the file with the temp GLF file and read it back in, compare against what we first stored - geliText = new RodGeliText("myROD"); - // buffer the new records we see - List records2 = new ArrayList(); + List records2 = new ArrayList(); reader = createReader(tempFile); // get the first real line (non-header) @@ -165,18 +164,21 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { // while we have records, make a Variant Context and output it to a GLF file while (line != null && line != "") { - parseGeliText(geliText,line); + geliText = (GeliTextFeature)codec.decode(line); records2.add(geliText); // we know they're all single calls in the reference file - line = readLine(reader); - + line = readLine(reader); } gw.close(); // close the file reader.close(); // compare sizes - Assert.assertEquals("The input GLF file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size()); + Assert.assertEquals("The input Geli file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size()); // now compare each record for (int x = 0; x < records.size(); x++) - Assert.assertTrue("GLF Records were not preserved when cycling them to and from disc", records.get(x).equals(records2.get(x))); + if(!records.get(x).equals(records2.get(x))) { + System.err.println("Record 1 :" + records.get(x).toString()); + System.err.println("Record 2 :" + records2.get(x).toString()); + Assert.fail("Geli Text Records were not preserved when cycling them to and from disc"); + } } /** @@ -231,24 +233,13 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { Assert.assertEquals("The input GeliBinary file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size()); // now compare each record for (int x = 0; x < records.size(); x++) { - Assert.assertTrue("GeliBinary Records were not preserved when cycling them to and from disc", records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord())); - } - } + if(!records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord())) { + Assert.fail("GeliBinary Records were not preserved when cycling them to and from disc"); + System.err.println("Record 1 :" + records.get(x).toString()); + System.err.println("Record 2 :" + records2.get(x).toString()); - - /** - * parse the geli given a line representation - * @param geliText the object to parse into - * @param line the line to parse with - */ - private void parseGeliText(RodGeliText geliText, String line) { - boolean parsed = false; - try { - parsed = geliText.parseLine(null,line.split(TabularROD.DEFAULT_DELIMITER_REGEX)); - } catch (IOException e) { - Assert.fail("IOException: " + e.getMessage()); + } } - if (!parsed) Assert.fail("Unable to parse line" + line); } /** diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java index 6be807945..ce1cefe74 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java @@ -20,11 +20,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testVariantsToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("211be63cf93cddf021e5b0eb9341b386"); + md5.add("4828a31b10b90698723328829ae4ecd3"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " -B variant,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + + " -B variant,GeliText," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + " -T VariantsToVCF" + " -L 1:10,000,000-11,000,000" + " -sample NA123AB" + @@ -37,11 +37,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("b31446fa91b8ed82ad73a5a5a72700a7"); + md5.add("1f55df5c40f2325847bc35522aba1d70"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " -B variant,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + + " -B variant,GeliText," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + " -T VariantsToVCF" + " -L 1:10,000,000-11,000,000" + " -sample NA123AB" + diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 7abe6cd52..093a1fdaa 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -22,7 +22,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { executeTest("testFastaAlternateReferenceIndels", spec2); WalkerTestSpec spec4 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B snps,Variants," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s", + "-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B snps,GeliText," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s", 1, Arrays.asList("82705a88f6fc25880dd2331183531d9a")); executeTest("testFastaAlternateReferenceSnps", spec4); diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java index c3dc1370e..fb41bd657 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java @@ -18,8 +18,8 @@ public class RodSystemValidationIntegrationTest extends WalkerTest { @Test public void testSimpleGeliPileup() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B eval,Variants," + validationDataLocation + "ROD_validation/chr1.geli", 1, - Arrays.asList("536567b13ea4b8786badd96c879df245")); + baseTestString() + " -B eval,GeliText," + validationDataLocation + "ROD_validation/chr1.geli", 1, + Arrays.asList("0d338375f724dd2b5481a8606892c772")); executeTest("testVCFSelect1", spec); } }