Adding GeliText tribble track as the first enabled Tribble track. This mean 'Variants' is no longer valid for a ROD type, use GeliText instead. I've updated all the references in the codebase.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3271 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7fbfd34315
commit
cbed0b1ade
|
|
@ -2,14 +2,19 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
|||
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.QueryableTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
|
|
@ -36,16 +41,16 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
/**
|
||||
* A pool of iterators for navigating through the genome.
|
||||
*/
|
||||
private ReferenceOrderedDataPool iteratorPool = null;
|
||||
private final ReferenceOrderedDataPool iteratorPool;
|
||||
|
||||
/**
|
||||
* Create a new reference-ordered data source.
|
||||
* @param rod
|
||||
* @param rod the reference ordered data
|
||||
*/
|
||||
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
|
||||
this.rod = rod;
|
||||
// if (!rod.supportsQuery()) // TODO: Aaron turn on to enable Tribble searches
|
||||
this.iteratorPool = new ReferenceOrderedDataPool( walker, rod );
|
||||
if (rod.supportsQuery()) iteratorPool = null;
|
||||
else iteratorPool = new ReferenceOrderedDataPool( walker, rod );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -70,6 +75,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @return Iterator through the data.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator seek( Shard shard ) {
|
||||
if (iteratorPool == null) // use query
|
||||
return getQuery(shard.getGenomeLocs());
|
||||
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
|
||||
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
return RODIterator;
|
||||
|
|
@ -83,18 +90,28 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @return Iterator through the data.
|
||||
*/
|
||||
public LocationAwareSeekableRODIterator seek(GenomeLoc loc) {
|
||||
if (iteratorPool == null) // use query
|
||||
return getQuery(Arrays.asList(loc));
|
||||
DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream();
|
||||
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
return RODIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* assuming the ROD is a queryable ROD, use that interface to get an iterator to the selected region
|
||||
* @param loc the region to query for
|
||||
* @return a LocationAwareSeekableRODIterator over the selected region
|
||||
*/
|
||||
private LocationAwareSeekableRODIterator getQuery(List<GenomeLoc> loc) {
|
||||
return new StitchingLocationAwareSeekableRODIterator(loc,(QueryableTrack)rod);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the specified iterator, returning it to the pool.
|
||||
* @param iterator Iterator to close.
|
||||
*/
|
||||
public void close( LocationAwareSeekableRODIterator iterator ) {
|
||||
this.iteratorPool.release(iterator);
|
||||
if (iteratorPool != null) iteratorPool.release(iterator);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -169,4 +186,80 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* stitch together the multiple calls to seek (since shards can have multiple intervals now)
|
||||
* on the underlying Tribble track into one seamless iteration
|
||||
*/
|
||||
class StitchingLocationAwareSeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||
|
||||
// the list of intervals we're iterating over
|
||||
private final LinkedList<GenomeLoc> locationList;
|
||||
|
||||
// The reference-ordered data itself.
|
||||
private final QueryableTrack rod;
|
||||
|
||||
// the current iterator
|
||||
private SeekableRODIterator iterator;
|
||||
|
||||
StitchingLocationAwareSeekableRODIterator(List<GenomeLoc> list, QueryableTrack rmd) {
|
||||
rod = rmd;
|
||||
locationList = new LinkedList<GenomeLoc>();
|
||||
locationList.addAll(list);
|
||||
fetchNextInterval();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc peekNextLocation() {
|
||||
if (iterator == null) return null;
|
||||
return iterator.peekNextLocation();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc position() {
|
||||
if (iterator == null) return null;
|
||||
return iterator.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RODRecordList seekForward(GenomeLoc interval) {
|
||||
RODRecordList list = iterator.seekForward(interval);
|
||||
if (list == null) { // we were unable to seek the current interval to the location
|
||||
fetchNextInterval();
|
||||
list = iterator.seekForward(interval);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (iterator == null) return false;
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RODRecordList next() {
|
||||
if (!hasNext()) throw new IllegalStateException("StitchingLocationAwareSeekableRODIterator: We do not have a next");
|
||||
RODRecordList list = iterator.next();
|
||||
if (!iterator.hasNext()) fetchNextInterval();
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("\"Thou shall not remove()!\" - Software Engineering Team");
|
||||
}
|
||||
|
||||
private void fetchNextInterval() {
|
||||
if (locationList != null && locationList.size() > 0) {
|
||||
GenomeLoc loc = locationList.getFirst();
|
||||
locationList.removeFirst();
|
||||
try {
|
||||
iterator = new SeekableRODIterator(rod.query(loc));
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to query iterator with location " + loc + " and rod name of " + ((RMDTrack)rod).getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,359 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class RodGeliText extends BasicReferenceOrderedDatum {
|
||||
public enum Genotype_Strings {
|
||||
AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
|
||||
}
|
||||
|
||||
public GenomeLoc loc;
|
||||
public char refBase = 'N';
|
||||
public int depth;
|
||||
public int maxMappingQuality;
|
||||
public String bestGenotype = "NN";
|
||||
public double lodBtr;
|
||||
public double lodBtnb;
|
||||
public double[] genotypePosteriors = new double[10];
|
||||
|
||||
public RodGeliText(final String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public String delimiterRegex() {
|
||||
return "\\s+";
|
||||
}
|
||||
|
||||
public boolean parseLine(Object header, String[] parts) throws IOException {
|
||||
if (parts.length < 18)
|
||||
throw new IOException("Invalid rodVariant row found -- too few elements. Expected 18+, got " + parts.length);
|
||||
if (!parts[0].startsWith("#")) {
|
||||
loc = GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1]));
|
||||
refBase = Character.toUpperCase(parts[2].charAt(0));
|
||||
depth = Integer.valueOf(parts[3]);
|
||||
maxMappingQuality = Integer.valueOf(parts[4]);
|
||||
|
||||
// UPPER case and sort
|
||||
char[] x = parts[5].toUpperCase().toCharArray();
|
||||
Arrays.sort(x);
|
||||
bestGenotype = new String(x);
|
||||
|
||||
lodBtr = Double.valueOf(parts[6]);
|
||||
lodBtnb = Double.valueOf(parts[7]);
|
||||
|
||||
for (int pieceIndex = 8, offset = 0; pieceIndex < 18; pieceIndex++, offset++) {
|
||||
genotypePosteriors[offset] = Double.valueOf(parts[pieceIndex]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%c\t%d\t%d\t%s\t%4.4f\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
|
||||
loc.getContig(),
|
||||
loc.getStart(),
|
||||
refBase,
|
||||
depth,
|
||||
maxMappingQuality,
|
||||
bestGenotype,
|
||||
lodBtr,
|
||||
lodBtnb,
|
||||
genotypePosteriors[0],
|
||||
genotypePosteriors[1],
|
||||
genotypePosteriors[2],
|
||||
genotypePosteriors[3],
|
||||
genotypePosteriors[4],
|
||||
genotypePosteriors[5],
|
||||
genotypePosteriors[6],
|
||||
genotypePosteriors[7],
|
||||
genotypePosteriors[8],
|
||||
genotypePosteriors[9]
|
||||
);
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
public String getReference() {
|
||||
return String.valueOf(this.refBase);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return Math.abs(lodBtr);
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (char base : bestGenotype.toCharArray())
|
||||
if (base != refBase)
|
||||
list.add(String.valueOf(base));
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlleleList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
if (this.bestGenotype.contains(getReference())) list.add(getReference());
|
||||
for (char c : this.bestGenotype.toCharArray())
|
||||
if (c != Utils.stringToChar(getReference()))
|
||||
list.add(String.valueOf(c));
|
||||
return list;
|
||||
}
|
||||
|
||||
public String getRefBasesFWD() {
|
||||
return String.format("%c", getRefSnpFWD());
|
||||
}
|
||||
|
||||
public char getRefSnpFWD() throws IllegalStateException {
|
||||
return refBase;
|
||||
}
|
||||
|
||||
public String getAltBasesFWD() {
|
||||
return String.format("%c", getAltSnpFWD());
|
||||
}
|
||||
|
||||
public char getAltSnpFWD() throws IllegalStateException {
|
||||
// both ref and bestGenotype have been uppercased, so it's safe to use ==
|
||||
char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
|
||||
//System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return refBase == bestGenotype.charAt(0) && refBase == bestGenotype.charAt(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
public boolean isSNP() {
|
||||
if (this.getReference().length() == 1)
|
||||
return (this.refBase != this.bestGenotype.charAt(0) || this.refBase != this.bestGenotype.charAt(1));
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
// we know that if we're a SNP, the alt is a single base
|
||||
if (this.bestGenotype.toString().charAt(0) == getReference().charAt(0))
|
||||
return this.bestGenotype.toString().charAt(1);
|
||||
return this.bestGenotype.toString().charAt(0);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getReferenceForSNP() {
|
||||
if (!isSNP()) throw new IllegalStateException("This site is not a SNP");
|
||||
// we know that if we're a SNP, the reference is a single base
|
||||
if (bestGenotype.toString().charAt(0) != getReference().charAt(0))
|
||||
return bestGenotype.toString().charAt(1);
|
||||
else
|
||||
return bestGenotype.toString().charAt(0);
|
||||
}
|
||||
|
||||
public double getMAF() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public double getHeterozygosity() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public boolean isGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public double getVariationConfidence() {
|
||||
return lodBtr;
|
||||
}
|
||||
|
||||
public double getConsensusConfidence() {
|
||||
return lodBtnb;
|
||||
}
|
||||
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
return Arrays.asList(getBestGenotype());
|
||||
}
|
||||
|
||||
public int getPloidy() throws IllegalStateException {
|
||||
return 2;
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public int length() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
public char getReferenceBase() {
|
||||
return refBase;
|
||||
}
|
||||
|
||||
public int getPileupDepth() {
|
||||
return depth;
|
||||
}
|
||||
|
||||
public int getMaxMappingQuality() {
|
||||
return maxMappingQuality;
|
||||
}
|
||||
|
||||
public String getBestGenotype() {
|
||||
return bestGenotype;
|
||||
}
|
||||
|
||||
public double getLodBtr() {
|
||||
return lodBtr;
|
||||
}
|
||||
|
||||
public double getLodBtnb() {
|
||||
return lodBtnb;
|
||||
}
|
||||
|
||||
public double[] getGenotypePosteriors() {
|
||||
return genotypePosteriors;
|
||||
}
|
||||
|
||||
public void adjustLikelihoods(double[] likelihoods) {
|
||||
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
|
||||
genotypePosteriors[likelihoodIndex] += likelihoods[likelihoodIndex];
|
||||
}
|
||||
|
||||
String bestGenotype = "NN";
|
||||
double bestLikelihood = Double.NEGATIVE_INFINITY;
|
||||
double nextBestLikelihood = Double.NEGATIVE_INFINITY;
|
||||
double refLikelihood = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
|
||||
if (genotypePosteriors[likelihoodIndex] > bestLikelihood) {
|
||||
bestLikelihood = genotypePosteriors[likelihoodIndex];
|
||||
|
||||
bestGenotype = Genotype_Strings.values()[likelihoodIndex].toString();
|
||||
}
|
||||
}
|
||||
|
||||
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
|
||||
if (genotypePosteriors[likelihoodIndex] > nextBestLikelihood && genotypePosteriors[likelihoodIndex] < bestLikelihood) {
|
||||
nextBestLikelihood = genotypePosteriors[likelihoodIndex];
|
||||
}
|
||||
}
|
||||
|
||||
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
|
||||
if (refBase == Genotype_Strings.values()[likelihoodIndex].toString().charAt(0) &&
|
||||
refBase == Genotype_Strings.values()[likelihoodIndex].toString().charAt(1)) {
|
||||
refLikelihood = genotypePosteriors[likelihoodIndex];
|
||||
}
|
||||
}
|
||||
|
||||
this.bestGenotype = bestGenotype;
|
||||
this.lodBtr = (bestLikelihood - refLikelihood);
|
||||
this.lodBtnb = (bestLikelihood - nextBestLikelihood);
|
||||
}
|
||||
|
||||
public boolean equals(RodGeliText other) {
|
||||
if (genotypePosteriors.length != genotypePosteriors.length) return false;
|
||||
for (int x = 0; x < genotypePosteriors.length; x++)
|
||||
if (MathUtils.compareDoubles(genotypePosteriors[x],other.genotypePosteriors[x],1e-4)!=0) return false;
|
||||
return (loc.equals(other.getLocation()) &&
|
||||
refBase == other.refBase &&
|
||||
depth == other.depth &&
|
||||
maxMappingQuality == other.maxMappingQuality &&
|
||||
bestGenotype.equals(other.bestGenotype) &&
|
||||
MathUtils.compareDoubles(lodBtr,other.lodBtr,1e-4) == 0&& // 1e-4 is about the precision the
|
||||
MathUtils.compareDoubles(lodBtnb,other.lodBtnb,1e-4) == 0); // output file seems to cooperate with
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
|
||||
|
|
@ -48,7 +49,7 @@ public class VariantContextAdaptors {
|
|||
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
|
||||
adaptors.put(HapMapROD.class, new HapMapAdaptor());
|
||||
adaptors.put(RodGLF.class, new GLFAdaptor());
|
||||
adaptors.put(RodGeliText.class, new GeliTextAdaptor());
|
||||
adaptors.put(GeliTextFeature.class, new GeliTextAdaptor());
|
||||
adaptors.put(rodGELI.class, new GeliAdaptor());
|
||||
}
|
||||
|
||||
|
|
@ -615,39 +616,42 @@ public class VariantContextAdaptors {
|
|||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
RodGeliText geli = (RodGeliText)input;
|
||||
if ( ! Allele.acceptableAlleleBases(geli.getReference()) )
|
||||
GeliTextFeature geli = (GeliTextFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase())) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(geli.getReference(), true);
|
||||
Allele refAllele = new Allele(String.valueOf(geli.getRefBase()), true);
|
||||
|
||||
// make sure we can convert it
|
||||
if ( geli.isSNP() || geli.isIndel()) {
|
||||
if ( geli.getGenotype().isHet() || !geli.getGenotype().containsBase(geli.getRefBase())) {
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
List<Allele> genotypeAlleles = new ArrayList<Allele>();
|
||||
// add all of the alt alleles
|
||||
for ( String alt : geli.getAlternateAlleleList() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
for ( char alt : geli.getGenotype().toString().toCharArray() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf(alt)) ) {
|
||||
return null;
|
||||
}
|
||||
Allele allele = new Allele(alt, false);
|
||||
if (!alleles.contains(allele)) alleles.add(allele);
|
||||
}
|
||||
Allele allele = new Allele(String.valueOf(alt), false);
|
||||
if (!alleles.contains(allele) && !refAllele.basesMatch(allele.getBases())) alleles.add(allele);
|
||||
|
||||
// add the allele, first checking if it's reference or not
|
||||
if (!refAllele.basesMatch(allele.getBases())) genotypeAlleles.add(allele);
|
||||
else genotypeAlleles.add(refAllele);
|
||||
}
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
Collection<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
MutableGenotype call = new MutableGenotype(name, alleles);
|
||||
MutableGenotype call = new MutableGenotype(name, genotypeAlleles);
|
||||
|
||||
// set the likelihoods, depth, and RMS mapping quality values
|
||||
call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.genotypePosteriors);
|
||||
call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaxMappingQuality());
|
||||
call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.depth);
|
||||
call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.getLikelihoods());
|
||||
call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaximumMappingQual());
|
||||
call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.getDepthOfCoverage());
|
||||
|
||||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
VariantContext vc = new VariantContext(name, geli.getLocation(), alleles, genotypes, geli.getNegLog10PError(), null, attributes);
|
||||
alleles.add(refAllele);
|
||||
VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
} else
|
||||
|
|
|
|||
|
|
@ -101,4 +101,14 @@ public class FeatureReaderTrack extends RMDTrack implements QueryableTrack {
|
|||
public Iterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop, contained),this.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Unable to close reader " + reader.toString(),e);
|
||||
}
|
||||
reader = null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,4 +41,5 @@ public interface QueryableTrack {
|
|||
public Iterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
|
||||
public Iterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
|
||||
public Iterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
|
||||
public void close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,9 +96,10 @@ public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
|
|||
// create a track builder instance for each track builder, and find out what tracks we can make
|
||||
for (String builderName : this.pluginsByName.keySet()) {
|
||||
RMDTrackBuilder builder = this.createByName(builderName);
|
||||
for (String name : builder.getAvailableTrackNamesAndTypes().keySet()) {
|
||||
Map<String, Class> mapping = builder.getAvailableTrackNamesAndTypes();
|
||||
for (String name : mapping.keySet()) {
|
||||
availableTracks.put(name.toUpperCase(), builder);
|
||||
availableTrackClasses.put(name.toUpperCase(), builder.getAvailableTrackNamesAndTypes().get(name));
|
||||
availableTrackClasses.put(name.toUpperCase(), mapping.get(name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
|
|||
Types.put("PointIndel", PointIndelROD.class);
|
||||
Types.put("HapMap", HapMapROD.class);
|
||||
Types.put("Intervals", IntervalRod.class);
|
||||
Types.put("Variants", RodGeliText.class);
|
||||
Types.put("GLF", RodGLF.class);
|
||||
Types.put("VCF", RodVCF.class);
|
||||
Types.put("PicardDbSNP", rodPicardDbSNP.class);
|
||||
|
|
|
|||
|
|
@ -66,8 +66,9 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
@Override
|
||||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
||||
Map<String, Class> classes = new HashMap<String, Class>();
|
||||
//for (String c : this.pluginsByName.keySet()) // TODO: Aaron uncomment these two lines when Tribble is live
|
||||
// if (!c.contains("SNP")) classes.put(c,this.pluginsByName.get(c));
|
||||
for (String c : this.pluginsByName.keySet())
|
||||
// we're excluding dbSNP and VCF right now
|
||||
if (!c.contains("SNP") && !c.contains("VCF")) classes.put(c,this.pluginsByName.get(c));
|
||||
return classes;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ import edu.mit.broad.picard.genotype.geli.GeliFileReader;
|
|||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.gelitext.GeliTextCodec;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broad.tribble.util.AsciiLineReader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
@ -131,10 +133,10 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI,tempFile);
|
||||
((GeliTextWriter)gw).writeHeader(null); // the write header command ignores the parameter
|
||||
|
||||
RodGeliText geliText = new RodGeliText("myROD"); // now cycle the input file to the output file
|
||||
|
||||
GeliTextFeature geliText;
|
||||
GeliTextCodec codec = new GeliTextCodec();
|
||||
// buffer the records we see
|
||||
List<RodGeliText> records = new ArrayList<RodGeliText>();
|
||||
List<GeliTextFeature> records = new ArrayList<GeliTextFeature>();
|
||||
|
||||
// a little more complicated than the GLF example, we have to read the file in
|
||||
AsciiLineReader reader = createReader(knownFile);
|
||||
|
|
@ -144,20 +146,17 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
|
||||
// while we have records, make a Variant Context and output it to a GLF file
|
||||
while (line != null && line != "") {
|
||||
parseGeliText(geliText, line);
|
||||
geliText = (GeliTextFeature)codec.decode(line);
|
||||
records.add(geliText); // we know they're all single calls in the reference file
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText);
|
||||
gw.addCall(vc,null);
|
||||
if (vc != null) gw.addCall(vc,null);
|
||||
line = readLine(reader);
|
||||
}
|
||||
gw.close(); // close the file
|
||||
reader.close();
|
||||
|
||||
// now reopen the file with the temp GLF file and read it back in, compare against what we first stored
|
||||
geliText = new RodGeliText("myROD");
|
||||
|
||||
// buffer the new records we see
|
||||
List<RodGeliText> records2 = new ArrayList<RodGeliText>();
|
||||
List<GeliTextFeature> records2 = new ArrayList<GeliTextFeature>();
|
||||
|
||||
reader = createReader(tempFile);
|
||||
// get the first real line (non-header)
|
||||
|
|
@ -165,18 +164,21 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
|
||||
// while we have records, make a Variant Context and output it to a GLF file
|
||||
while (line != null && line != "") {
|
||||
parseGeliText(geliText,line);
|
||||
geliText = (GeliTextFeature)codec.decode(line);
|
||||
records2.add(geliText); // we know they're all single calls in the reference file
|
||||
line = readLine(reader);
|
||||
|
||||
line = readLine(reader);
|
||||
}
|
||||
gw.close(); // close the file
|
||||
reader.close();
|
||||
// compare sizes
|
||||
Assert.assertEquals("The input GLF file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
|
||||
Assert.assertEquals("The input Geli file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
|
||||
// now compare each record
|
||||
for (int x = 0; x < records.size(); x++)
|
||||
Assert.assertTrue("GLF Records were not preserved when cycling them to and from disc", records.get(x).equals(records2.get(x)));
|
||||
if(!records.get(x).equals(records2.get(x))) {
|
||||
System.err.println("Record 1 :" + records.get(x).toString());
|
||||
System.err.println("Record 2 :" + records2.get(x).toString());
|
||||
Assert.fail("Geli Text Records were not preserved when cycling them to and from disc");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -231,24 +233,13 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
|||
Assert.assertEquals("The input GeliBinary file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
|
||||
// now compare each record
|
||||
for (int x = 0; x < records.size(); x++) {
|
||||
Assert.assertTrue("GeliBinary Records were not preserved when cycling them to and from disc", records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord()));
|
||||
}
|
||||
}
|
||||
if(!records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord())) {
|
||||
Assert.fail("GeliBinary Records were not preserved when cycling them to and from disc");
|
||||
System.err.println("Record 1 :" + records.get(x).toString());
|
||||
System.err.println("Record 2 :" + records2.get(x).toString());
|
||||
|
||||
|
||||
/**
|
||||
* parse the geli given a line representation
|
||||
* @param geliText the object to parse into
|
||||
* @param line the line to parse with
|
||||
*/
|
||||
private void parseGeliText(RodGeliText geliText, String line) {
|
||||
boolean parsed = false;
|
||||
try {
|
||||
parsed = geliText.parseLine(null,line.split(TabularROD.DEFAULT_DELIMITER_REGEX));
|
||||
} catch (IOException e) {
|
||||
Assert.fail("IOException: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
if (!parsed) Assert.fail("Unable to parse line" + line);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -20,11 +20,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantsToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("211be63cf93cddf021e5b0eb9341b386");
|
||||
md5.add("4828a31b10b90698723328829ae4ecd3");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
" -B variant,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
|
||||
" -B variant,GeliText," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
|
||||
" -T VariantsToVCF" +
|
||||
" -L 1:10,000,000-11,000,000" +
|
||||
" -sample NA123AB" +
|
||||
|
|
@ -37,11 +37,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("b31446fa91b8ed82ad73a5a5a72700a7");
|
||||
md5.add("1f55df5c40f2325847bc35522aba1d70");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
" -B variant,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
|
||||
" -B variant,GeliText," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
|
||||
" -T VariantsToVCF" +
|
||||
" -L 1:10,000,000-11,000,000" +
|
||||
" -sample NA123AB" +
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
|||
executeTest("testFastaAlternateReferenceIndels", spec2);
|
||||
|
||||
WalkerTestSpec spec4 = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B snps,Variants," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s",
|
||||
"-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B snps,GeliText," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s",
|
||||
1,
|
||||
Arrays.asList("82705a88f6fc25880dd2331183531d9a"));
|
||||
executeTest("testFastaAlternateReferenceSnps", spec4);
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testSimpleGeliPileup() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B eval,Variants," + validationDataLocation + "ROD_validation/chr1.geli", 1,
|
||||
Arrays.asList("536567b13ea4b8786badd96c879df245"));
|
||||
baseTestString() + " -B eval,GeliText," + validationDataLocation + "ROD_validation/chr1.geli", 1,
|
||||
Arrays.asList("0d338375f724dd2b5481a8606892c772"));
|
||||
executeTest("testVCFSelect1", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue