Adding GeliText tribble track as the first enabled Tribble track. This mean 'Variants' is no longer valid for a ROD type, use GeliText instead. I've updated all the references in the codebase.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3271 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-04-29 22:50:17 +00:00
parent 7fbfd34315
commit cbed0b1ade
12 changed files with 164 additions and 423 deletions

View File

@ -2,14 +2,19 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.QueryableTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/**
* User: hanna
@ -36,16 +41,16 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
/**
* A pool of iterators for navigating through the genome.
*/
private ReferenceOrderedDataPool iteratorPool = null;
private final ReferenceOrderedDataPool iteratorPool;
/**
* Create a new reference-ordered data source.
* @param rod
* @param rod the reference ordered data
*/
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
this.rod = rod;
// if (!rod.supportsQuery()) // TODO: Aaron turn on to enable Tribble searches
this.iteratorPool = new ReferenceOrderedDataPool( walker, rod );
if (rod.supportsQuery()) iteratorPool = null;
else iteratorPool = new ReferenceOrderedDataPool( walker, rod );
}
/**
@ -70,6 +75,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data.
*/
public LocationAwareSeekableRODIterator seek( Shard shard ) {
if (iteratorPool == null) // use query
return getQuery(shard.getGenomeLocs());
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator;
@ -83,18 +90,28 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data.
*/
public LocationAwareSeekableRODIterator seek(GenomeLoc loc) {
if (iteratorPool == null) // use query
return getQuery(Arrays.asList(loc));
DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream();
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator;
}
/**
* assuming the ROD is a queryable ROD, use that interface to get an iterator to the selected region
* @param loc the region to query for
* @return a LocationAwareSeekableRODIterator over the selected region
*/
private LocationAwareSeekableRODIterator getQuery(List<GenomeLoc> loc) {
return new StitchingLocationAwareSeekableRODIterator(loc,(QueryableTrack)rod);
}
/**
* Close the specified iterator, returning it to the pool.
* @param iterator Iterator to close.
*/
public void close( LocationAwareSeekableRODIterator iterator ) {
this.iteratorPool.release(iterator);
if (iteratorPool != null) iteratorPool.release(iterator);
}
}
@ -169,4 +186,80 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
}
}
/**
* stitch together the multiple calls to seek (since shards can have multiple intervals now)
* on the underlying Tribble track into one seamless iteration
*/
class StitchingLocationAwareSeekableRODIterator implements LocationAwareSeekableRODIterator {
// the list of intervals we're iterating over
private final LinkedList<GenomeLoc> locationList;
// The reference-ordered data itself.
private final QueryableTrack rod;
// the current iterator
private SeekableRODIterator iterator;
StitchingLocationAwareSeekableRODIterator(List<GenomeLoc> list, QueryableTrack rmd) {
rod = rmd;
locationList = new LinkedList<GenomeLoc>();
locationList.addAll(list);
fetchNextInterval();
}
@Override
public GenomeLoc peekNextLocation() {
if (iterator == null) return null;
return iterator.peekNextLocation();
}
@Override
public GenomeLoc position() {
if (iterator == null) return null;
return iterator.position();
}
@Override
public RODRecordList seekForward(GenomeLoc interval) {
RODRecordList list = iterator.seekForward(interval);
if (list == null) { // we were unable to seek the current interval to the location
fetchNextInterval();
list = iterator.seekForward(interval);
}
return list;
}
@Override
public boolean hasNext() {
if (iterator == null) return false;
return iterator.hasNext();
}
@Override
public RODRecordList next() {
if (!hasNext()) throw new IllegalStateException("StitchingLocationAwareSeekableRODIterator: We do not have a next");
RODRecordList list = iterator.next();
if (!iterator.hasNext()) fetchNextInterval();
return list;
}
@Override
public void remove() {
throw new UnsupportedOperationException("\"Thou shall not remove()!\" - Software Engineering Team");
}
private void fetchNextInterval() {
if (locationList != null && locationList.size() > 0) {
GenomeLoc loc = locationList.getFirst();
locationList.removeFirst();
try {
iterator = new SeekableRODIterator(rod.query(loc));
} catch (IOException e) {
throw new StingException("Unable to query iterator with location " + loc + " and rod name of " + ((RMDTrack)rod).getName());
}
}
}
}

View File

@ -1,359 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class RodGeliText extends BasicReferenceOrderedDatum {
public enum Genotype_Strings {
AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
}
public GenomeLoc loc;
public char refBase = 'N';
public int depth;
public int maxMappingQuality;
public String bestGenotype = "NN";
public double lodBtr;
public double lodBtnb;
public double[] genotypePosteriors = new double[10];
public RodGeliText(final String name) {
super(name);
}
public String delimiterRegex() {
return "\\s+";
}
public boolean parseLine(Object header, String[] parts) throws IOException {
if (parts.length < 18)
throw new IOException("Invalid rodVariant row found -- too few elements. Expected 18+, got " + parts.length);
if (!parts[0].startsWith("#")) {
loc = GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1]));
refBase = Character.toUpperCase(parts[2].charAt(0));
depth = Integer.valueOf(parts[3]);
maxMappingQuality = Integer.valueOf(parts[4]);
// UPPER case and sort
char[] x = parts[5].toUpperCase().toCharArray();
Arrays.sort(x);
bestGenotype = new String(x);
lodBtr = Double.valueOf(parts[6]);
lodBtnb = Double.valueOf(parts[7]);
for (int pieceIndex = 8, offset = 0; pieceIndex < 18; pieceIndex++, offset++) {
genotypePosteriors[offset] = Double.valueOf(parts[pieceIndex]);
}
return true;
}
return false;
}
public String toString() {
return String.format("%s\t%d\t%c\t%d\t%d\t%s\t%4.4f\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
loc.getContig(),
loc.getStart(),
refBase,
depth,
maxMappingQuality,
bestGenotype,
lodBtr,
lodBtnb,
genotypePosteriors[0],
genotypePosteriors[1],
genotypePosteriors[2],
genotypePosteriors[3],
genotypePosteriors[4],
genotypePosteriors[5],
genotypePosteriors[6],
genotypePosteriors[7],
genotypePosteriors[8],
genotypePosteriors[9]
);
}
public GenomeLoc getLocation() {
return loc;
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
public String getReference() {
return String.valueOf(this.refBase);
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public double getNegLog10PError() {
return Math.abs(lodBtr);
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public List<String> getAlternateAlleleList() {
List<String> list = new ArrayList<String>();
for (char base : bestGenotype.toCharArray())
if (base != refBase)
list.add(String.valueOf(base));
return list;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public List<String> getAlleleList() {
List<String> list = new ArrayList<String>();
if (this.bestGenotype.contains(getReference())) list.add(getReference());
for (char c : this.bestGenotype.toCharArray())
if (c != Utils.stringToChar(getReference()))
list.add(String.valueOf(c));
return list;
}
public String getRefBasesFWD() {
return String.format("%c", getRefSnpFWD());
}
public char getRefSnpFWD() throws IllegalStateException {
return refBase;
}
public String getAltBasesFWD() {
return String.format("%c", getAltSnpFWD());
}
public char getAltSnpFWD() throws IllegalStateException {
// both ref and bestGenotype have been uppercased, so it's safe to use ==
char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
//System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c);
return c;
}
public boolean isReference() {
return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1);
}
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
public double getNonRefAlleleFrequency() {
return 1.0;
}
public boolean isSNP() {
if (this.getReference().length() == 1)
return (this.refBase != this.bestGenotype.charAt(0) || this.refBase != this.bestGenotype.charAt(1));
return false;
}
public boolean isInsertion() {
return false;
}
public boolean isDeletion() {
return false;
}
public boolean isIndel() {
return false;
}
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* of
*
* @return a char, representing the alternate base
*/
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
// we know that if we're a SNP, the alt is a single base
if (this.bestGenotype.toString().charAt(0) == getReference().charAt(0))
return this.bestGenotype.toString().charAt(1);
return this.bestGenotype.toString().charAt(0);
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
public char getReferenceForSNP() {
if (!isSNP()) throw new IllegalStateException("This site is not a SNP");
// we know that if we're a SNP, the reference is a single base
if (bestGenotype.toString().charAt(0) != getReference().charAt(0))
return bestGenotype.toString().charAt(1);
else
return bestGenotype.toString().charAt(0);
}
public double getMAF() {
return 0;
}
public double getHeterozygosity() {
return 0;
}
public boolean isGenotype() {
return true;
}
public double getVariationConfidence() {
return lodBtr;
}
public double getConsensusConfidence() {
return lodBtnb;
}
public List<String> getGenotype() throws IllegalStateException {
return Arrays.asList(getBestGenotype());
}
public int getPloidy() throws IllegalStateException {
return 2;
}
public boolean isBiallelic() {
return true;
}
public int length() {
return 1;
}
public char getReferenceBase() {
return refBase;
}
public int getPileupDepth() {
return depth;
}
public int getMaxMappingQuality() {
return maxMappingQuality;
}
public String getBestGenotype() {
return bestGenotype;
}
public double getLodBtr() {
return lodBtr;
}
public double getLodBtnb() {
return lodBtnb;
}
public double[] getGenotypePosteriors() {
return genotypePosteriors;
}
public void adjustLikelihoods(double[] likelihoods) {
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
genotypePosteriors[likelihoodIndex] += likelihoods[likelihoodIndex];
}
String bestGenotype = "NN";
double bestLikelihood = Double.NEGATIVE_INFINITY;
double nextBestLikelihood = Double.NEGATIVE_INFINITY;
double refLikelihood = Double.NEGATIVE_INFINITY;
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
if (genotypePosteriors[likelihoodIndex] > bestLikelihood) {
bestLikelihood = genotypePosteriors[likelihoodIndex];
bestGenotype = Genotype_Strings.values()[likelihoodIndex].toString();
}
}
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
if (genotypePosteriors[likelihoodIndex] > nextBestLikelihood && genotypePosteriors[likelihoodIndex] < bestLikelihood) {
nextBestLikelihood = genotypePosteriors[likelihoodIndex];
}
}
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
if (refBase == Genotype_Strings.values()[likelihoodIndex].toString().charAt(0) &&
refBase == Genotype_Strings.values()[likelihoodIndex].toString().charAt(1)) {
refLikelihood = genotypePosteriors[likelihoodIndex];
}
}
this.bestGenotype = bestGenotype;
this.lodBtr = (bestLikelihood - refLikelihood);
this.lodBtnb = (bestLikelihood - nextBestLikelihood);
}
public boolean equals(RodGeliText other) {
if (genotypePosteriors.length != genotypePosteriors.length) return false;
for (int x = 0; x < genotypePosteriors.length; x++)
if (MathUtils.compareDoubles(genotypePosteriors[x],other.genotypePosteriors[x],1e-4)!=0) return false;
return (loc.equals(other.getLocation()) &&
refBase == other.refBase &&
depth == other.depth &&
maxMappingQuality == other.maxMappingQuality &&
bestGenotype.equals(other.bestGenotype) &&
MathUtils.compareDoubles(lodBtr,other.lodBtr,1e-4) == 0&& // 1e-4 is about the precision the
MathUtils.compareDoubles(lodBtnb,other.lodBtnb,1e-4) == 0); // output file seems to cooperate with
}
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.refdata;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import org.broad.tribble.gelitext.GeliTextFeature;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
@ -48,7 +49,7 @@ public class VariantContextAdaptors {
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
adaptors.put(HapMapROD.class, new HapMapAdaptor());
adaptors.put(RodGLF.class, new GLFAdaptor());
adaptors.put(RodGeliText.class, new GeliTextAdaptor());
adaptors.put(GeliTextFeature.class, new GeliTextAdaptor());
adaptors.put(rodGELI.class, new GeliAdaptor());
}
@ -615,39 +616,42 @@ public class VariantContextAdaptors {
* @return a VariantContext object
*/
VariantContext convert(String name, Object input, ReferenceContext ref) {
RodGeliText geli = (RodGeliText)input;
if ( ! Allele.acceptableAlleleBases(geli.getReference()) )
GeliTextFeature geli = (GeliTextFeature)input;
if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase())) )
return null;
Allele refAllele = new Allele(geli.getReference(), true);
Allele refAllele = new Allele(String.valueOf(geli.getRefBase()), true);
// make sure we can convert it
if ( geli.isSNP() || geli.isIndel()) {
if ( geli.getGenotype().isHet() || !geli.getGenotype().containsBase(geli.getRefBase())) {
// add the reference allele
List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
List<Allele> genotypeAlleles = new ArrayList<Allele>();
// add all of the alt alleles
for ( String alt : geli.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
for ( char alt : geli.getGenotype().toString().toCharArray() ) {
if ( ! Allele.acceptableAlleleBases(String.valueOf(alt)) ) {
return null;
}
Allele allele = new Allele(alt, false);
if (!alleles.contains(allele)) alleles.add(allele);
}
Allele allele = new Allele(String.valueOf(alt), false);
if (!alleles.contains(allele) && !refAllele.basesMatch(allele.getBases())) alleles.add(allele);
// add the allele, first checking if it's reference or not
if (!refAllele.basesMatch(allele.getBases())) genotypeAlleles.add(allele);
else genotypeAlleles.add(refAllele);
}
Map<String, String> attributes = new HashMap<String, String>();
Collection<Genotype> genotypes = new ArrayList<Genotype>();
MutableGenotype call = new MutableGenotype(name, alleles);
MutableGenotype call = new MutableGenotype(name, genotypeAlleles);
// set the likelihoods, depth, and RMS mapping quality values
call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.genotypePosteriors);
call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaxMappingQuality());
call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.depth);
call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.getLikelihoods());
call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaximumMappingQual());
call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.getDepthOfCoverage());
// add the call to the genotype list, and then use this list to create a VariantContext
genotypes.add(call);
VariantContext vc = new VariantContext(name, geli.getLocation(), alleles, genotypes, geli.getNegLog10PError(), null, attributes);
alleles.add(refAllele);
VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
vc.validate();
return vc;
} else

View File

@ -101,4 +101,14 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
public Iterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop, contained),this.getName());
}
@Override
public void close() {
try {
reader.close();
} catch (IOException e) {
throw new StingException("Unable to close reader " + reader.toString(),e);
}
reader = null;
}
}

View File

@ -41,4 +41,5 @@ public interface QueryableTrack {
public Iterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
public Iterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
public Iterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
public void close();
}

View File

@ -96,9 +96,10 @@ public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
// create a track builder instance for each track builder, and find out what tracks we can make
for (String builderName : this.pluginsByName.keySet()) {
RMDTrackBuilder builder = this.createByName(builderName);
for (String name : builder.getAvailableTrackNamesAndTypes().keySet()) {
Map<String, Class> mapping = builder.getAvailableTrackNamesAndTypes();
for (String name : mapping.keySet()) {
availableTracks.put(name.toUpperCase(), builder);
availableTrackClasses.put(name.toUpperCase(), builder.getAvailableTrackNamesAndTypes().get(name));
availableTrackClasses.put(name.toUpperCase(), mapping.get(name));
}
}
}

View File

@ -66,7 +66,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
Types.put("PointIndel", PointIndelROD.class);
Types.put("HapMap", HapMapROD.class);
Types.put("Intervals", IntervalRod.class);
Types.put("Variants", RodGeliText.class);
Types.put("GLF", RodGLF.class);
Types.put("VCF", RodVCF.class);
Types.put("PicardDbSNP", rodPicardDbSNP.class);

View File

@ -66,8 +66,9 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
@Override
public Map<String, Class> getAvailableTrackNamesAndTypes() {
Map<String, Class> classes = new HashMap<String, Class>();
//for (String c : this.pluginsByName.keySet()) // TODO: Aaron uncomment these two lines when Tribble is live
// if (!c.contains("SNP")) classes.put(c,this.pluginsByName.get(c));
for (String c : this.pluginsByName.keySet())
// we're excluding dbSNP and VCF right now
if (!c.contains("SNP") && !c.contains("VCF")) classes.put(c,this.pluginsByName.get(c));
return classes;
}

View File

@ -4,6 +4,8 @@ import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.gelitext.GeliTextCodec;
import org.broad.tribble.gelitext.GeliTextFeature;
import org.broad.tribble.util.AsciiLineReader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@ -131,10 +133,10 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI,tempFile);
((GeliTextWriter)gw).writeHeader(null); // the write header command ignores the parameter
RodGeliText geliText = new RodGeliText("myROD"); // now cycle the input file to the output file
GeliTextFeature geliText;
GeliTextCodec codec = new GeliTextCodec();
// buffer the records we see
List<RodGeliText> records = new ArrayList<RodGeliText>();
List<GeliTextFeature> records = new ArrayList<GeliTextFeature>();
// a little more complicated than the GLF example, we have to read the file in
AsciiLineReader reader = createReader(knownFile);
@ -144,20 +146,17 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
// while we have records, make a Variant Context and output it to a GLF file
while (line != null && line != "") {
parseGeliText(geliText, line);
geliText = (GeliTextFeature)codec.decode(line);
records.add(geliText); // we know they're all single calls in the reference file
VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText);
gw.addCall(vc,null);
if (vc != null) gw.addCall(vc,null);
line = readLine(reader);
}
gw.close(); // close the file
reader.close();
// now reopen the file with the temp GLF file and read it back in, compare against what we first stored
geliText = new RodGeliText("myROD");
// buffer the new records we see
List<RodGeliText> records2 = new ArrayList<RodGeliText>();
List<GeliTextFeature> records2 = new ArrayList<GeliTextFeature>();
reader = createReader(tempFile);
// get the first real line (non-header)
@ -165,18 +164,21 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
// while we have records, make a Variant Context and output it to a GLF file
while (line != null && line != "") {
parseGeliText(geliText,line);
geliText = (GeliTextFeature)codec.decode(line);
records2.add(geliText); // we know they're all single calls in the reference file
line = readLine(reader);
line = readLine(reader);
}
gw.close(); // close the file
reader.close();
// compare sizes
Assert.assertEquals("The input GLF file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
Assert.assertEquals("The input Geli file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
// now compare each record
for (int x = 0; x < records.size(); x++)
Assert.assertTrue("GLF Records were not preserved when cycling them to and from disc", records.get(x).equals(records2.get(x)));
if(!records.get(x).equals(records2.get(x))) {
System.err.println("Record 1 :" + records.get(x).toString());
System.err.println("Record 2 :" + records2.get(x).toString());
Assert.fail("Geli Text Records were not preserved when cycling them to and from disc");
}
}
/**
@ -231,24 +233,13 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
Assert.assertEquals("The input GeliBinary file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
// now compare each record
for (int x = 0; x < records.size(); x++) {
Assert.assertTrue("GeliBinary Records were not preserved when cycling them to and from disc", records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord()));
}
}
if(!records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord())) {
Assert.fail("GeliBinary Records were not preserved when cycling them to and from disc");
System.err.println("Record 1 :" + records.get(x).toString());
System.err.println("Record 2 :" + records2.get(x).toString());
/**
* parse the geli given a line representation
* @param geliText the object to parse into
* @param line the line to parse with
*/
private void parseGeliText(RodGeliText geliText, String line) {
boolean parsed = false;
try {
parsed = geliText.parseLine(null,line.split(TabularROD.DEFAULT_DELIMITER_REGEX));
} catch (IOException e) {
Assert.fail("IOException: " + e.getMessage());
}
}
if (!parsed) Assert.fail("Unable to parse line" + line);
}
/**

View File

@ -20,11 +20,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("211be63cf93cddf021e5b0eb9341b386");
md5.add("4828a31b10b90698723328829ae4ecd3");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
" -B variant,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
" -B variant,GeliText," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
" -T VariantsToVCF" +
" -L 1:10,000,000-11,000,000" +
" -sample NA123AB" +
@ -37,11 +37,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("b31446fa91b8ed82ad73a5a5a72700a7");
md5.add("1f55df5c40f2325847bc35522aba1d70");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
" -B variant,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
" -B variant,GeliText," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
" -T VariantsToVCF" +
" -L 1:10,000,000-11,000,000" +
" -sample NA123AB" +

View File

@ -22,7 +22,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
executeTest("testFastaAlternateReferenceIndels", spec2);
WalkerTestSpec spec4 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B snps,Variants," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s",
"-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B snps,GeliText," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s",
1,
Arrays.asList("82705a88f6fc25880dd2331183531d9a"));
executeTest("testFastaAlternateReferenceSnps", spec4);

View File

@ -18,8 +18,8 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
@Test
public void testSimpleGeliPileup() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B eval,Variants," + validationDataLocation + "ROD_validation/chr1.geli", 1,
Arrays.asList("536567b13ea4b8786badd96c879df245"));
baseTestString() + " -B eval,GeliText," + validationDataLocation + "ROD_validation/chr1.geli", 1,
Arrays.asList("0d338375f724dd2b5481a8606892c772"));
executeTest("testVCFSelect1", spec);
}
}