DbSNP moved over to tribble
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3288 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
447081583a
commit
f497213933
|
|
@ -30,6 +30,7 @@ import net.sf.picard.reference.ReferenceSequenceFile;
|
|||
import net.sf.samtools.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
|
|
@ -47,7 +48,6 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
|||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.commandline.ArgumentException;
|
||||
|
|
@ -331,7 +331,7 @@ public class GenomeAnalysisEngine {
|
|||
//
|
||||
// please don't use these in the future, use the new syntax <- if we're not using these please remove them
|
||||
//
|
||||
if (argCollection.DBSNPFile != null) bindConvenienceRods(rodDbSNP.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile);
|
||||
if (argCollection.DBSNPFile != null) bindConvenienceRods(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile);
|
||||
if (argCollection.HAPMAPFile != null)
|
||||
bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", argCollection.HAPMAPFile);
|
||||
if (argCollection.HAPMAPChipFile != null)
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
*/
|
||||
public LocationAwareSeekableRODIterator seek( Shard shard ) {
|
||||
if (iteratorPool == null) // use query
|
||||
return getQuery(shard.getGenomeLocs());
|
||||
return getQuery(shard.getGenomeLocs() == null || shard.getGenomeLocs().size() == 0 ? null : shard.getGenomeLocs());
|
||||
DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream();
|
||||
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
return RODIterator;
|
||||
|
|
@ -91,7 +91,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
*/
|
||||
public LocationAwareSeekableRODIterator seek(GenomeLoc loc) {
|
||||
if (iteratorPool == null) // use query
|
||||
return getQuery(Arrays.asList(loc));
|
||||
return getQuery(loc == null ? null : Arrays.asList(loc));
|
||||
DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream();
|
||||
LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
|
||||
return RODIterator;
|
||||
|
|
@ -103,6 +103,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @return a LocationAwareSeekableRODIterator over the selected region
|
||||
*/
|
||||
private LocationAwareSeekableRODIterator getQuery(List<GenomeLoc> loc) {
|
||||
if (loc == null) // for the mono shard case
|
||||
return new SeekableRODIterator(rod.getIterator());
|
||||
return new StitchingLocationAwareSeekableRODIterator(loc,(QueryableTrack)rod);
|
||||
}
|
||||
|
||||
|
|
@ -253,6 +255,7 @@ class StitchingLocationAwareSeekableRODIterator implements LocationAwareSeekable
|
|||
if (locationList != null && locationList.size() > 0) {
|
||||
GenomeLoc loc = locationList.getFirst();
|
||||
locationList.removeFirst();
|
||||
if (rod == null) throw new StingException("Unable to query(), target rod is null, next location = " + ((locationList != null) ? locationList.getFirst() : "null"));
|
||||
try {
|
||||
iterator = new SeekableRODIterator(rod.query(loc));
|
||||
} catch (IOException e) {
|
||||
|
|
|
|||
|
|
@ -2,12 +2,14 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
|
|
@ -43,7 +45,7 @@ public class VariantContextAdaptors {
|
|||
private static Map<Class, VCAdaptor> adaptors = new HashMap<Class, VCAdaptor>();
|
||||
|
||||
static {
|
||||
adaptors.put(rodDbSNP.class, new RodDBSnpAdaptor());
|
||||
adaptors.put(DbSNPFeature.class, new DBSnpAdaptor());
|
||||
adaptors.put(RodVCF.class, new RodVCFAdaptor());
|
||||
adaptors.put(VCFRecord.class, new VCFRecordAdaptor());
|
||||
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
|
||||
|
|
@ -94,24 +96,24 @@ public class VariantContextAdaptors {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private static class RodDBSnpAdaptor extends VCAdaptor {
|
||||
private static class DBSnpAdaptor extends VCAdaptor {
|
||||
VariantContext convert(String name, Object input) {
|
||||
return convert(name, input, null);
|
||||
}
|
||||
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
rodDbSNP dbsnp = (rodDbSNP)input;
|
||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) )
|
||||
DbSNPFeature dbsnp = (DbSNPFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(dbsnp.getReference(), true);
|
||||
Allele refAllele = new Allele(DbSNPHelper.getReference(dbsnp), true);
|
||||
|
||||
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
|
||||
if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
// add all of the alt alleles
|
||||
for ( String alt : dbsnp.getAlternateAlleleList() ) {
|
||||
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
|
|
@ -120,9 +122,9 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
attributes.put("ID", dbsnp.getRS_ID());
|
||||
attributes.put("ID", dbsnp.getRsID());
|
||||
Collection<Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, dbsnp.getLocation(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
||||
VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(dbsnp.getChr(),dbsnp.getStart(),dbsnp.getEnd()), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
} else
|
||||
|
|
|
|||
|
|
@ -1,295 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Example format:
|
||||
* 585 chr1 433 433 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1
|
||||
* 585 chr1 491 492 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1
|
||||
* <p/>
|
||||
* User: mdepristo
|
||||
* Date: Feb 27, 2009
|
||||
* Time: 10:47:14 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class rodDbSNP extends BasicReferenceOrderedDatum {
|
||||
|
||||
public static final String STANDARD_DBSNP_TRACK_NAME = "dbsnp";
|
||||
|
||||
public GenomeLoc loc; // genome location of SNP
|
||||
// Reference sequence chromosome or scaffold
|
||||
// Start and stop positions in chrom
|
||||
|
||||
public String RS_ID; // Reference SNP identifier or Affy SNP name
|
||||
public String strand; // Which DNA strand contains the observed alleles
|
||||
|
||||
public String refBases; // the reference base according to NCBI, in the dbSNP file
|
||||
public String observed; // The sequences of the observed alleles from rs-fasta files
|
||||
|
||||
public String molType; // Sample type from exemplar ss
|
||||
public String varType; // The class of variant (simple, insertion, deletion, range, etc.)
|
||||
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
|
||||
public String validationStatus; // The validation status of the SNP
|
||||
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
|
||||
|
||||
public double avHet; // The average heterozygosity from all observations
|
||||
public double avHetSE; // The Standard Error for the average heterozygosity
|
||||
|
||||
public String func; // The functional category of the SNP (coding-synon, coding-nonsynon, intron, etc.)
|
||||
// set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5',
|
||||
// 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5')
|
||||
public String locType; // How the variant affects the reference sequence
|
||||
// enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion')
|
||||
|
||||
public int weight; // The quality of the alignment
|
||||
|
||||
// cache the allele list so it doesn't need to get recomputed each time
|
||||
private List<String> alleleList = null;
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public rodDbSNP(final String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// manipulating the SNP information
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
public String getReference() {
|
||||
return refBases;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return 4; // -log10(0.0001)
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (String allele : getAlleleList())
|
||||
if (!allele.equals(getReference())) ret.add(allele);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlleleList() {
|
||||
if ( alleleList == null ) {
|
||||
// add ref first
|
||||
if ( onFwdStrand() )
|
||||
alleleList = Arrays.asList(observed.split("/"));
|
||||
else
|
||||
alleleList = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
|
||||
if ( alleleList.size() > 0 && alleleList.contains(getReference()) && !alleleList.get(0).equals(this.getReference()) )
|
||||
Collections.swap(alleleList, alleleList.indexOf(getReference()), 0);
|
||||
}
|
||||
|
||||
return alleleList;
|
||||
}
|
||||
|
||||
public boolean onFwdStrand() {
|
||||
return strand.equals("+");
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 0; // dbSNP doesn't know the allele frequency
|
||||
}
|
||||
|
||||
//
|
||||
// What kind of variant are we?
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public boolean isSNP() {
|
||||
return varType.contains("single") && locType.contains("exact");
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return varType.contains("insertion");
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return varType.contains("deletion");
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return isInsertion() || isDeletion() || varType.contains("in-del");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws a StingException when we can't parse out a
|
||||
* single base to return, the site isn't a snp, or the site isn't biallelic
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc);
|
||||
if (!isBiallelic()) throw new StingException("We're not biallelic; at position " + this.loc);
|
||||
List<String> ret = this.getAlternateAlleleList();
|
||||
if (ret.size() == 1 && ret.get(0).length() == 1)
|
||||
return ret.get(0).charAt(0);
|
||||
throw new StingException("getAlternativeBaseForSNP failed for DbSNP rod " + this.loc);
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base is the case of a SNP. Throws a StingException if we're not a SNP.
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getReferenceForSNP() {
|
||||
if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc);
|
||||
if (refBases.length() != 1) throw new StingException("The reference base in DbSNP must be zero, at position " + this.loc + " was " + refBases);
|
||||
return refBases.charAt(0); // we know it's length 1, this is ok
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return false; // snp locations are never "reference", there's always a variant
|
||||
}
|
||||
|
||||
public boolean isHapmap() {
|
||||
return validationStatus.contains("by-hapmap");
|
||||
}
|
||||
|
||||
public boolean is2Hit2Allele() {
|
||||
return validationStatus.contains("by-2hit-2allele");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// formatting
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public String getRS_ID() { return RS_ID; }
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
||||
getLocation().getContig(), getLocation().getStart(), getLocation().getStop() + 1,
|
||||
RS_ID, strand, refBases, observed, molType,
|
||||
varType, validationStatus, avHet, avHetSE, func, locType, weight);
|
||||
}
|
||||
|
||||
public String toSimpleString() {
|
||||
return String.format("%s:%s:%s", RS_ID, observed, strand);
|
||||
}
|
||||
|
||||
public String toMediumString() {
|
||||
String s = String.format("%s:%s:%s", getLocation().toString(), RS_ID, Utils.join("",this.getAlleleList()));
|
||||
if (isSNP()) s += ":SNP";
|
||||
if (isIndel()) s += ":Indel";
|
||||
if (isHapmap()) s += ":Hapmap";
|
||||
if (is2Hit2Allele()) s += ":2Hit";
|
||||
return s;
|
||||
}
|
||||
|
||||
public String repl() {
|
||||
return String.format("%d\t%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
||||
585, getLocation().getContig(), getLocation().getStart() - 1, getLocation().getStop(),
|
||||
RS_ID, strand, refBases, refBases, observed, molType,
|
||||
varType, validationStatus, avHet, avHetSE, func, locType, weight);
|
||||
}
|
||||
|
||||
public boolean parseLine(final Object header, final String[] parts) {
|
||||
try {
|
||||
String contig = parts[1];
|
||||
long start = Long.parseLong(parts[2]) + 1; // The final is 0 based
|
||||
long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based
|
||||
loc = GenomeLocParser.parseGenomeLoc(contig, start, Math.max(start, stop - 1));
|
||||
|
||||
RS_ID = parts[4];
|
||||
strand = parts[6];
|
||||
refBases = parts[7];
|
||||
//if (strand.equals("-")) // this is just wrong lol
|
||||
// refBases = BaseUtils.simpleReverseComplement(refBases);
|
||||
observed = parts[9];
|
||||
molType = parts[10];
|
||||
varType = parts[11];
|
||||
validationStatus = parts[12];
|
||||
avHet = Double.parseDouble(parts[13]);
|
||||
avHetSE = Double.parseDouble(parts[14]);
|
||||
func = parts[15];
|
||||
locType = parts[16];
|
||||
weight = Integer.parseInt(parts[17]);
|
||||
//System.out.printf("Parsed %s%n", toString());
|
||||
return true;
|
||||
} catch (MalformedGenomeLocException ex) {
|
||||
// Just rethrow malformed genome locs; the ROD system itself will deal with these.
|
||||
throw ex;
|
||||
} catch (ArrayIndexOutOfBoundsException ex) {
|
||||
// Just rethrow malformed genome locs; the ROD system itself will deal with these.
|
||||
throw new RuntimeException("Badly formed dbSNP line: " + ex);
|
||||
} catch (RuntimeException e) {
|
||||
System.out.printf(" Exception caught during parsing DBSNP line %s%n", Utils.join(" <=> ", parts));
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public double getHeterozygosity() {
|
||||
return avHet;
|
||||
}
|
||||
|
||||
public int getPloidy() throws IllegalStateException {
|
||||
return 2; // our DbSNP assumes a diploid human
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
return getAlternateAlleleList().size() == 1;
|
||||
}
|
||||
|
||||
public static rodDbSNP getFirstRealSNP(List<Object> dbsnpList) {
|
||||
if (dbsnpList == null)
|
||||
return null;
|
||||
|
||||
rodDbSNP dbsnp = null;
|
||||
for (Object d : dbsnpList) {
|
||||
if (d instanceof rodDbSNP && ((rodDbSNP) d).isSNP()) {
|
||||
dbsnp = (rodDbSNP) d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return dbsnp;
|
||||
}
|
||||
}
|
||||
|
|
@ -55,7 +55,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
|
|||
static {
|
||||
// All known ROD types
|
||||
Types.put("GFF", RodGenotypeChipAsGFF.class);
|
||||
Types.put("dbSNP", rodDbSNP.class);
|
||||
Types.put("SAMPileup", rodSAMPileup.class);
|
||||
Types.put("GELI", rodGELI.class);
|
||||
Types.put("RefSeq", rodRefSeq.class);
|
||||
|
|
|
|||
|
|
@ -67,8 +67,8 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
||||
Map<String, Class> classes = new HashMap<String, Class>();
|
||||
for (String c : this.pluginsByName.keySet())
|
||||
// we're excluding dbSNP and VCF right now
|
||||
if (!c.contains("SNP") && !c.contains("VCF")) classes.put(c,this.pluginsByName.get(c));
|
||||
// we're excluding VCF right now
|
||||
if (!c.contains("VCF")) classes.put(c,this.pluginsByName.get(c));
|
||||
return classes;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,130 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils.helpers;
|
||||
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
import org.broad.tribble.annotation.Strand;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* this class contains static helper methods for DbSNP
|
||||
*/
|
||||
public class DbSNPHelper {
|
||||
public static final String STANDARD_DBSNP_TRACK_NAME = "dbsnp";
|
||||
|
||||
private DbSNPHelper() {} // don't make a DbSNPHelper
|
||||
|
||||
public static DbSNPFeature getFirstRealSNP(List<Object> dbsnpList) {
|
||||
if (dbsnpList == null)
|
||||
return null;
|
||||
|
||||
DbSNPFeature dbsnp = null;
|
||||
for (Object d : dbsnpList) {
|
||||
if (d instanceof DbSNPFeature && DbSNPHelper.isSNP((DbSNPFeature)d)) {
|
||||
dbsnp = (DbSNPFeature) d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return dbsnp;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
public static double getNegLog10PError(DbSNPFeature feature) {
|
||||
return 4; // -log10(0.0001)
|
||||
}
|
||||
|
||||
//
|
||||
// What kind of variant are we?
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public static boolean isSNP(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
|
||||
}
|
||||
|
||||
public static String toMediumString(DbSNPFeature feature) {
|
||||
String s = String.format("%s:%d:%s:%s", feature.getChr(), feature.getStart(), feature.getRsID(), Utils.join("",feature.getObserved()));
|
||||
if (isSNP(feature)) s += ":SNP";
|
||||
if (isIndel(feature)) s += ":Indel";
|
||||
if (isHapmap(feature)) s += ":Hapmap";
|
||||
if (is2Hit2Allele(feature)) s += ":2Hit";
|
||||
return s;
|
||||
}
|
||||
|
||||
public static boolean isInsertion(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("insertion");
|
||||
}
|
||||
|
||||
public static boolean isDeletion(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("deletion");
|
||||
}
|
||||
|
||||
public static boolean isIndel(DbSNPFeature feature) {
|
||||
return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || feature.getVariantType().contains("in-del");
|
||||
}
|
||||
|
||||
|
||||
public static boolean isHapmap(DbSNPFeature feature) {
|
||||
return feature.getValidationStatus().contains("by-hapmap");
|
||||
}
|
||||
|
||||
public static boolean is2Hit2Allele(DbSNPFeature feature) {
|
||||
return feature.getValidationStatus().contains("by-2hit-2allele");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlternateAlleleList(DbSNPFeature feature) {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (String allele : getAlleleList(feature))
|
||||
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static boolean onFwdStrand(DbSNPFeature feature) {
|
||||
return feature.getStrand() == Strand.POSITIVE;
|
||||
}
|
||||
|
||||
public static String getReference(DbSNPFeature feature) {
|
||||
return feature.getNCBIRefBase();
|
||||
}
|
||||
|
||||
public static String toSimpleString(DbSNPFeature feature) {
|
||||
return String.format("%s:%s:%s", feature.getRsID(), feature.getObserved(), (feature.getStrand() == Strand.POSITIVE) ? "+" : "-");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlleleList(DbSNPFeature feature) {
|
||||
List<String> alleleList = new ArrayList<String>();
|
||||
// add ref first
|
||||
if ( onFwdStrand(feature) )
|
||||
alleleList = Arrays.asList(feature.getObserved());
|
||||
else
|
||||
for (String str : feature.getObserved())
|
||||
alleleList.add(SequenceUtil.reverseComplement(str));
|
||||
if ( alleleList.size() > 0 && alleleList.contains(getReference(feature)) && !alleleList.get(0).equals(getReference(feature)) )
|
||||
Collections.swap(alleleList, alleleList.indexOf(getReference(feature)), 0);
|
||||
|
||||
return alleleList;
|
||||
}
|
||||
}
|
||||
|
|
@ -25,12 +25,13 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -143,16 +144,16 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
|
||||
ArrayList<String> rodStrings = new ArrayList<String>();
|
||||
for ( GATKFeature datum : tracker.getAllRods() ) {
|
||||
if ( datum != null && ! (datum.getUnderlyingObject() instanceof rodDbSNP)) {
|
||||
rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron figure out what to do with this line, it's bad form
|
||||
if ( datum != null && ! (datum.getUnderlyingObject() instanceof DbSNPFeature)) {
|
||||
rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it
|
||||
}
|
||||
}
|
||||
String rodString = Utils.join(", ", rodStrings);
|
||||
|
||||
rodDbSNP dbsnp = tracker.lookup(rodDbSNP.STANDARD_DBSNP_TRACK_NAME,rodDbSNP.class);
|
||||
DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class);
|
||||
|
||||
if ( dbsnp != null)
|
||||
rodString += dbsnp.toMediumString();
|
||||
rodString += DbSNPHelper.toMediumString(dbsnp);
|
||||
|
||||
if ( !rodString.equals("") )
|
||||
rodString = "[ROD: " + rodString + "]";
|
||||
|
|
|
|||
|
|
@ -25,11 +25,13 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
|
|
@ -58,14 +60,14 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
||||
return 0;
|
||||
|
||||
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME));
|
||||
DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||
|
||||
Collection<VariantContext> contexts = tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), true, false);
|
||||
|
||||
for ( VariantContext vc : contexts ) {
|
||||
VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false);
|
||||
if ( dbsnp != null )
|
||||
vcf.setID(dbsnp.getRS_ID());
|
||||
vcf.setID(dbsnp.getRsID());
|
||||
// set the appropriate sample name if necessary
|
||||
if ( sampleName != null && vcf.hasGenotypeData() && vcf.getGenotype(INPUT_ROD_NAME) != null )
|
||||
vcf.getGenotype(INPUT_ROD_NAME).setSampleName(sampleName);
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
|
|
@ -32,8 +33,8 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
@ -145,7 +146,7 @@ public class VariantAnnotatorEngine {
|
|||
List<ReferenceOrderedDataSource> dataSources = engine.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getName().equals(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||
if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||
annotateDbsnp = true;
|
||||
}
|
||||
if ( rod.getName().equals("hapmap2") ) {
|
||||
|
|
@ -181,11 +182,11 @@ public class VariantAnnotatorEngine {
|
|||
|
||||
// annotate dbsnp occurrence
|
||||
if ( annotateDbsnp ) {
|
||||
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME));
|
||||
DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||
infoAnnotations.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1");
|
||||
// annotate dbsnp id if available and not already there
|
||||
if ( dbsnp != null && !vc.hasAttribute("ID") )
|
||||
infoAnnotations.put("ID", dbsnp.getRS_ID());
|
||||
infoAnnotations.put("ID", dbsnp.getRsID());
|
||||
}
|
||||
|
||||
if ( annotateHapmap2 ) {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
|
||||
|
|
@ -96,7 +97,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
*
|
||||
* @return the dbsnp rod if there is one at this position
|
||||
*/
|
||||
public static rodDbSNP getDbSNP(RefMetaDataTracker tracker) {
|
||||
return rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME));
|
||||
public static DbSNPFeature getDbSNP(RefMetaDataTracker tracker) {
|
||||
return DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
|
||||
|
|
@ -388,9 +388,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
if ( bestAFguess != 0 )
|
||||
attributes.put(VCFRecord.ALLELE_FREQUENCY_KEY, new Double((double)bestAFguess / (double)(frequencyEstimationPoints-1)));
|
||||
|
||||
rodDbSNP dbsnp = getDbSNP(tracker);
|
||||
DbSNPFeature dbsnp = getDbSNP(tracker);
|
||||
if ( dbsnp != null )
|
||||
attributes.put("ID", dbsnp.getRS_ID());
|
||||
attributes.put("ID", dbsnp.getRsID());
|
||||
|
||||
if ( !UAC.NO_SLOD ) {
|
||||
// the overall lod
|
||||
|
|
|
|||
|
|
@ -33,8 +33,8 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -121,7 +121,7 @@ public class UnifiedGenotyperEngine {
|
|||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getName().equals(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||
if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||
this.annotateDbsnp = true;
|
||||
}
|
||||
if ( rod.getName().equals("hapmap2") ) {
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ import net.sf.samtools.util.SequenceUtil;
|
|||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
@ -153,7 +153,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// Warn the user if a dbSNP file was specified since it isn't being used here
|
||||
boolean foundDBSNP = false;
|
||||
for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
|
||||
if( rod.getName().equalsIgnoreCase(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||
if( rod.getName().equalsIgnoreCase(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||
foundDBSNP = true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,14 +25,17 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -68,13 +71,17 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
if ( SNP_MASK != null ) {
|
||||
logger.info("Loading SNP mask... ");
|
||||
ReferenceOrderedData snp_mask;
|
||||
if ( SNP_MASK.contains(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)) {
|
||||
snp_mask = new ReferenceOrderedData<rodDbSNP>("snp_mask",new java.io.File(SNP_MASK),rodDbSNP.class);
|
||||
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
|
||||
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
|
||||
Iterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPFeature.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
|
||||
snpMaskIterator = new SeekableRODIterator(iter);
|
||||
|
||||
} else {
|
||||
snp_mask = new ReferenceOrderedData<TabularROD>("snp_mask",
|
||||
new java.io.File(SNP_MASK), TabularROD.class);
|
||||
snpMaskIterator = new SeekableRODIterator(new GATKFeatureIterator(snp_mask.iterator()));
|
||||
}
|
||||
snpMaskIterator = new SeekableRODIterator(new GATKFeatureIterator(snp_mask.iterator()));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils
|
|||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.playground.utils.report.ReportMarshaller;
|
||||
import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory;
|
||||
|
|
@ -117,7 +117,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
protected String[] SELECT_NAMES = {};
|
||||
|
||||
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
||||
protected String[] KNOWN_NAMES = {rodDbSNP.STANDARD_DBSNP_TRACK_NAME};
|
||||
protected String[] KNOWN_NAMES = {DbSNPHelper.STANDARD_DBSNP_TRACK_NAME};
|
||||
|
||||
@Argument(shortName="sample", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false)
|
||||
protected String[] SAMPLES = {};
|
||||
|
|
@ -270,7 +270,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
evalNames.add(d.getName());
|
||||
} else if ( d.getName().startsWith("comp") ) {
|
||||
compNames.add(d.getName());
|
||||
} else if ( d.getName().startsWith(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) || d.getName().startsWith("hapmap") ) {
|
||||
} else if ( d.getName().startsWith(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) || d.getName().startsWith("hapmap") ) {
|
||||
compNames.add(d.getName());
|
||||
} else {
|
||||
logger.info("Not evaluating ROD binding " + d.getName());
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -904,7 +904,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
|||
MultiSampleCallResult MultiSampleCall(RefMetaDataTracker tracker, char ref, AlignmentContext context, List<String> sample_names)
|
||||
{
|
||||
String in_dbsnp;
|
||||
if (tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME).size() > 0) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; }
|
||||
if (tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME).size() > 0) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; }
|
||||
|
||||
AlignmentContext[] contexts = filterAlignmentContext(context, sample_names, 0);
|
||||
glCache.clear(); // reset the cache
|
||||
|
|
|
|||
|
|
@ -1,142 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
*
|
||||
* Class ValidateDbSNPConversion
|
||||
*
|
||||
* a quick walker to validate the dbSNP conversion.
|
||||
*/
|
||||
public class ValidateDbSNPConversion extends RefWalker<Pair<Matrix.BASE, Matrix.BASE>, Matrix> {
|
||||
@Override
|
||||
public Pair<Matrix.BASE, Matrix.BASE> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (!tracker.hasROD(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)) return null;
|
||||
rodDbSNP rod = tracker.lookup(rodDbSNP.STANDARD_DBSNP_TRACK_NAME,rodDbSNP.class);
|
||||
if (rod != null && rod.isSNP() && rod.isBiallelic()) {
|
||||
return new Pair<Matrix.BASE, Matrix.BASE>(Matrix.BASE.toBase((byte) ref.getBase()), Matrix.BASE.toBase((byte) rod.getAlternativeBaseForSNP()));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
@Override
|
||||
public Matrix reduceInit() {
|
||||
return new Matrix();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
*
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
@Override
|
||||
public Matrix reduce(Pair<Matrix.BASE, Matrix.BASE> value, Matrix sum) {
|
||||
if (value != null) sum.addValue(value.first,value.second);
|
||||
return sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Matrix result) {
|
||||
result.printStats(out);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
class Matrix {
|
||||
public enum BASE {
|
||||
A((byte) 65), C((byte) 67), G((byte) 71), T((byte) 84), N((byte)78);
|
||||
private byte val = 65;
|
||||
|
||||
BASE(byte val) {
|
||||
this.val = val;
|
||||
}
|
||||
|
||||
byte getVal() {
|
||||
return val;
|
||||
}
|
||||
|
||||
static BASE toBase(byte b) {
|
||||
if (b > 96) b =- 32;
|
||||
if (b == A.val) return A;
|
||||
if (b == C.val) return C;
|
||||
if (b == G.val) return G;
|
||||
if (b == T.val) return T;
|
||||
// we don't really care, let return N instead of: throw new UnsupportedOperationException("Unknown base " + b);
|
||||
return N;
|
||||
}
|
||||
}
|
||||
|
||||
private int[][] mat = new int[BASE.values().length][BASE.values().length];
|
||||
|
||||
public void addValue(BASE ref, BASE alt) {
|
||||
mat[ref.ordinal()][alt.ordinal()] += 1;
|
||||
}
|
||||
|
||||
public void printStats(PrintStream stream) {
|
||||
for (BASE a : BASE.values()) {
|
||||
stream.print("ref " + a.toString() + ":");
|
||||
for (BASE b : BASE.values()) {
|
||||
stream.print(" " + mat[a.ordinal()][b.ordinal()]);
|
||||
}
|
||||
stream.println();
|
||||
}
|
||||
}
|
||||
|
||||
public void addMatrix(Matrix mt) {
|
||||
for (BASE a : BASE.values()) {
|
||||
for (BASE b : BASE.values()) {
|
||||
mat[a.ordinal()][b.ordinal()] += mt.mat[a.ordinal()][b.ordinal()];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Matrix() {
|
||||
for (int x = 0; x < BASE.values().length; x++) {
|
||||
for (int y = 0; y < BASE.values().length; y++) {
|
||||
mat[x][y] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,87 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class rodDbSNPUnitTest
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class rodDbSNPUnitTest extends BaseTest {
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
public BufferedReader openFile(String filename) {
|
||||
try {
|
||||
return new BufferedReader(new FileReader(filename));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Couldn't open file " + filename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
// count the number of SNP's between 10M and 11M on chr1 in dbSNP
|
||||
public void testDBSNPInput() {
|
||||
BufferedReader stream = openFile("/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod");
|
||||
int snpCount = 0;
|
||||
int indelCount = 0;
|
||||
try {
|
||||
String line = stream.readLine();
|
||||
rodDbSNP rod = new rodDbSNP("db");
|
||||
boolean stop = false;
|
||||
while (line != null && !stop) {
|
||||
rod.parseLine(null,line.split("\t"));
|
||||
rodDbSNP var = (rodDbSNP)rod;
|
||||
if (rod.isSNP()) {
|
||||
// quick check, if we're not triallelic, make sure the ref is right
|
||||
if (var.getReferenceForSNP() == var.refBases.charAt(0) || var.getAlternativeBaseForSNP() == var.refBases.charAt(0))
|
||||
// also make sure the ref is a single character
|
||||
if (var.refBases.length() == 1)
|
||||
Assert.assertTrue(var.refBases.charAt(0)==var.getReferenceForSNP());
|
||||
if (var.getLocation().getContig().equals("1") &&
|
||||
var.getLocation().getStart() >= 10000000 &&
|
||||
var.getLocation().getStart() <= 11000000) {
|
||||
if (var.isSNP()) {
|
||||
snpCount++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if (rod.isIndel())
|
||||
indelCount++;
|
||||
|
||||
stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000);
|
||||
line = stream.readLine();
|
||||
}
|
||||
Assert.assertEquals(3615,snpCount);
|
||||
Assert.assertEquals(9902,indelCount);
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue