diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index ef51a65be..65d044664 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -30,6 +30,7 @@ import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; @@ -47,7 +48,6 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.commandline.ArgumentException; @@ -331,7 +331,7 @@ public class GenomeAnalysisEngine { // // please don't use these in the future, use the new syntax <- if we're not using these please remove them // - if (argCollection.DBSNPFile != null) bindConvenienceRods(rodDbSNP.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile); + if (argCollection.DBSNPFile != null) bindConvenienceRods(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile); if (argCollection.HAPMAPFile != null) bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", argCollection.HAPMAPFile); if (argCollection.HAPMAPChipFile != null) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index 5a96ce838..eba717cd0 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -76,7 +76,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { */ public LocationAwareSeekableRODIterator seek( Shard shard ) { if (iteratorPool == null) // use query - return getQuery(shard.getGenomeLocs()); + return getQuery(shard.getGenomeLocs() == null || shard.getGenomeLocs().size() == 0 ? null : shard.getGenomeLocs()); DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs().get(0)) : new EntireStream(); LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment); return RODIterator; @@ -91,7 +91,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { */ public LocationAwareSeekableRODIterator seek(GenomeLoc loc) { if (iteratorPool == null) // use query - return getQuery(Arrays.asList(loc)); + return getQuery(loc == null ? null : Arrays.asList(loc)); DataStreamSegment dataStreamSegment = loc != null ? new MappedStreamSegment(loc) : new EntireStream(); LocationAwareSeekableRODIterator RODIterator = iteratorPool.iterator(dataStreamSegment); return RODIterator; @@ -103,6 +103,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @return a LocationAwareSeekableRODIterator over the selected region */ private LocationAwareSeekableRODIterator getQuery(List loc) { + if (loc == null) // for the mono shard case + return new SeekableRODIterator(rod.getIterator()); return new StitchingLocationAwareSeekableRODIterator(loc,(QueryableTrack)rod); } @@ -253,6 +255,7 @@ class StitchingLocationAwareSeekableRODIterator implements LocationAwareSeekable if (locationList != null && locationList.size() > 0) { GenomeLoc loc = locationList.getFirst(); locationList.removeFirst(); + if (rod == null) throw new StingException("Unable to query(), target rod is null, next location = " + ((locationList != null) ? locationList.getFirst() : "null")); try { iterator = new SeekableRODIterator(rod.query(loc)); } catch (IOException e) { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index ab835b6b4..4556c7907 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -2,12 +2,14 @@ package org.broadinstitute.sting.gatk.refdata; import edu.mit.broad.picard.genotype.DiploidGenotype; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; +import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; @@ -43,7 +45,7 @@ public class VariantContextAdaptors { private static Map adaptors = new HashMap(); static { - adaptors.put(rodDbSNP.class, new RodDBSnpAdaptor()); + adaptors.put(DbSNPFeature.class, new DBSnpAdaptor()); adaptors.put(RodVCF.class, new RodVCFAdaptor()); adaptors.put(VCFRecord.class, new VCFRecordAdaptor()); adaptors.put(PlinkRod.class, new PlinkRodAdaptor()); @@ -94,24 +96,24 @@ public class VariantContextAdaptors { // // -------------------------------------------------------------------------------------------------------------- - private static class RodDBSnpAdaptor extends VCAdaptor { + private static class DBSnpAdaptor extends VCAdaptor { VariantContext convert(String name, Object input) { return convert(name, input, null); } VariantContext convert(String name, Object input, ReferenceContext ref) { - rodDbSNP dbsnp = (rodDbSNP)input; - if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) + DbSNPFeature dbsnp = (DbSNPFeature)input; + if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) ) return null; - Allele refAllele = new Allele(dbsnp.getReference(), true); + Allele refAllele = new Allele(DbSNPHelper.getReference(dbsnp), true); - if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) { + if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) { // add the reference allele List alleles = new ArrayList(); alleles.add(refAllele); // add all of the alt alleles - for ( String alt : dbsnp.getAlternateAlleleList() ) { + for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) { if ( ! Allele.acceptableAlleleBases(alt) ) { //System.out.printf("Excluding dbsnp record %s%n", dbsnp); return null; @@ -120,9 +122,9 @@ public class VariantContextAdaptors { } Map attributes = new HashMap(); - attributes.put("ID", dbsnp.getRS_ID()); + attributes.put("ID", dbsnp.getRsID()); Collection genotypes = null; - VariantContext vc = new VariantContext(name, dbsnp.getLocation(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); + VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(dbsnp.getChr(),dbsnp.getStart(),dbsnp.getEnd()), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); vc.validate(); return vc; } else diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java deleted file mode 100644 index a20cc1ecb..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ /dev/null @@ -1,295 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import net.sf.samtools.util.SequenceUtil; -import org.broadinstitute.sting.utils.*; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -/** - * Example format: - * 585 chr1 433 433 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1 - * 585 chr1 491 492 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1 - *

- * User: mdepristo - * Date: Feb 27, 2009 - * Time: 10:47:14 AM - * To change this template use File | Settings | File Templates. - */ -public class rodDbSNP extends BasicReferenceOrderedDatum { - - public static final String STANDARD_DBSNP_TRACK_NAME = "dbsnp"; - - public GenomeLoc loc; // genome location of SNP - // Reference sequence chromosome or scaffold - // Start and stop positions in chrom - - public String RS_ID; // Reference SNP identifier or Affy SNP name - public String strand; // Which DNA strand contains the observed alleles - - public String refBases; // the reference base according to NCBI, in the dbSNP file - public String observed; // The sequences of the observed alleles from rs-fasta files - - public String molType; // Sample type from exemplar ss - public String varType; // The class of variant (simple, insertion, deletion, range, etc.) - // Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion' - public String validationStatus; // The validation status of the SNP - // one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap') - - public double avHet; // The average heterozygosity from all observations - public double avHetSE; // The Standard Error for the average heterozygosity - - public String func; // The functional category of the SNP (coding-synon, coding-nonsynon, intron, etc.) - // set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5', - // 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5') - public String locType; // How the variant affects the reference sequence - // enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion') - - public int weight; // The quality of the alignment - - // cache the allele list so it doesn't need to get recomputed each time - private List alleleList = null; - - - // ---------------------------------------------------------------------- - // - // Constructors - // - // ---------------------------------------------------------------------- - public rodDbSNP(final String name) { - super(name); - } - - // ---------------------------------------------------------------------- - // - // manipulating the SNP information - // - // ---------------------------------------------------------------------- - public GenomeLoc getLocation() { - return loc; - } - - /** - * get the reference base(s) at this position - * - * @return the reference base or bases, as a string - */ - public String getReference() { - return refBases; - } - - /** - * get the -1 * (log 10 of the error value) - * - * @return the log based error estimate - */ - public double getNegLog10PError() { - return 4; // -log10(0.0001) - } - - /** - * gets the alternate alleles. This method should return all the alleles present at the location, - * NOT including the reference base. This is returned as a string list with no guarantee ordering - * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest - * frequency). - * - * @return an alternate allele list - */ - public List getAlternateAlleleList() { - List ret = new ArrayList(); - for (String allele : getAlleleList()) - if (!allele.equals(getReference())) ret.add(allele); - return ret; - } - - /** - * gets the alleles. This method should return all the alleles present at the location, - * including the reference base. The first allele should always be the reference allele, followed - * by an unordered list of alternate alleles. - * - * @return an alternate allele list - */ - public List getAlleleList() { - if ( alleleList == null ) { - // add ref first - if ( onFwdStrand() ) - alleleList = Arrays.asList(observed.split("/")); - else - alleleList = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/")); - if ( alleleList.size() > 0 && alleleList.contains(getReference()) && !alleleList.get(0).equals(this.getReference()) ) - Collections.swap(alleleList, alleleList.indexOf(getReference()), 0); - } - - return alleleList; - } - - public boolean onFwdStrand() { - return strand.equals("+"); - } - - /** - * get the frequency of this variant - * - * @return VariantFrequency with the stored frequency - */ - public double getNonRefAlleleFrequency() { - return 0; // dbSNP doesn't know the allele frequency - } - - // - // What kind of variant are we? - // - // ---------------------------------------------------------------------- - public boolean isSNP() { - return varType.contains("single") && locType.contains("exact"); - } - - public boolean isInsertion() { - return varType.contains("insertion"); - } - - public boolean isDeletion() { - return varType.contains("deletion"); - } - - public boolean isIndel() { - return isInsertion() || isDeletion() || varType.contains("in-del"); - } - - /** - * gets the alternate base is the case of a SNP. Throws a StingException when we can't parse out a - * single base to return, the site isn't a snp, or the site isn't biallelic - * - * @return a char, representing the alternate base - */ - public char getAlternativeBaseForSNP() { - if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc); - if (!isBiallelic()) throw new StingException("We're not biallelic; at position " + this.loc); - List ret = this.getAlternateAlleleList(); - if (ret.size() == 1 && ret.get(0).length() == 1) - return ret.get(0).charAt(0); - throw new StingException("getAlternativeBaseForSNP failed for DbSNP rod " + this.loc); - } - - /** - * gets the reference base is the case of a SNP. Throws a StingException if we're not a SNP. - * - * @return a char, representing the alternate base - */ - public char getReferenceForSNP() { - if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc); - if (refBases.length() != 1) throw new StingException("The reference base in DbSNP must be zero, at position " + this.loc + " was " + refBases); - return refBases.charAt(0); // we know it's length 1, this is ok - } - - public boolean isReference() { - return false; // snp locations are never "reference", there's always a variant - } - - public boolean isHapmap() { - return validationStatus.contains("by-hapmap"); - } - - public boolean is2Hit2Allele() { - return validationStatus.contains("by-2hit-2allele"); - } - - // ---------------------------------------------------------------------- - // - // formatting - // - // ---------------------------------------------------------------------- - - public String getRS_ID() { return RS_ID; } - - public String toString() { - return String.format("%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d", - getLocation().getContig(), getLocation().getStart(), getLocation().getStop() + 1, - RS_ID, strand, refBases, observed, molType, - varType, validationStatus, avHet, avHetSE, func, locType, weight); - } - - public String toSimpleString() { - return String.format("%s:%s:%s", RS_ID, observed, strand); - } - - public String toMediumString() { - String s = String.format("%s:%s:%s", getLocation().toString(), RS_ID, Utils.join("",this.getAlleleList())); - if (isSNP()) s += ":SNP"; - if (isIndel()) s += ":Indel"; - if (isHapmap()) s += ":Hapmap"; - if (is2Hit2Allele()) s += ":2Hit"; - return s; - } - - public String repl() { - return String.format("%d\t%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d", - 585, getLocation().getContig(), getLocation().getStart() - 1, getLocation().getStop(), - RS_ID, strand, refBases, refBases, observed, molType, - varType, validationStatus, avHet, avHetSE, func, locType, weight); - } - - public boolean parseLine(final Object header, final String[] parts) { - try { - String contig = parts[1]; - long start = Long.parseLong(parts[2]) + 1; // The final is 0 based - long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based - loc = GenomeLocParser.parseGenomeLoc(contig, start, Math.max(start, stop - 1)); - - RS_ID = parts[4]; - strand = parts[6]; - refBases = parts[7]; - //if (strand.equals("-")) // this is just wrong lol - // refBases = BaseUtils.simpleReverseComplement(refBases); - observed = parts[9]; - molType = parts[10]; - varType = parts[11]; - validationStatus = parts[12]; - avHet = Double.parseDouble(parts[13]); - avHetSE = Double.parseDouble(parts[14]); - func = parts[15]; - locType = parts[16]; - weight = Integer.parseInt(parts[17]); - //System.out.printf("Parsed %s%n", toString()); - return true; - } catch (MalformedGenomeLocException ex) { - // Just rethrow malformed genome locs; the ROD system itself will deal with these. - throw ex; - } catch (ArrayIndexOutOfBoundsException ex) { - // Just rethrow malformed genome locs; the ROD system itself will deal with these. - throw new RuntimeException("Badly formed dbSNP line: " + ex); - } catch (RuntimeException e) { - System.out.printf(" Exception caught during parsing DBSNP line %s%n", Utils.join(" <=> ", parts)); - throw e; - } - } - - public double getHeterozygosity() { - return avHet; - } - - public int getPloidy() throws IllegalStateException { - return 2; // our DbSNP assumes a diploid human - } - - public boolean isBiallelic() { - return getAlternateAlleleList().size() == 1; - } - - public static rodDbSNP getFirstRealSNP(List dbsnpList) { - if (dbsnpList == null) - return null; - - rodDbSNP dbsnp = null; - for (Object d : dbsnpList) { - if (d instanceof rodDbSNP && ((rodDbSNP) d).isSNP()) { - dbsnp = (rodDbSNP) d; - break; - } - } - - return dbsnp; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java index f189e4228..206caea03 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java @@ -55,7 +55,6 @@ public class RODTrackBuilder implements RMDTrackBuilder { static { // All known ROD types Types.put("GFF", RodGenotypeChipAsGFF.class); - Types.put("dbSNP", rodDbSNP.class); Types.put("SAMPileup", rodSAMPileup.class); Types.put("GELI", rodGELI.class); Types.put("RefSeq", rodRefSeq.class); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java index 822f51192..269e7e589 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java @@ -67,8 +67,8 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen public Map getAvailableTrackNamesAndTypes() { Map classes = new HashMap(); for (String c : this.pluginsByName.keySet()) - // we're excluding dbSNP and VCF right now - if (!c.contains("SNP") && !c.contains("VCF")) classes.put(c,this.pluginsByName.get(c)); + // we're excluding VCF right now + if (!c.contains("VCF")) classes.put(c,this.pluginsByName.get(c)); return classes; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java new file mode 100644 index 000000000..e96e9a5b4 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java @@ -0,0 +1,130 @@ +package org.broadinstitute.sting.gatk.refdata.utils.helpers; + +import net.sf.samtools.util.SequenceUtil; +import org.broad.tribble.annotation.Strand; +import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.utils.Utils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * this class contains static helper methods for DbSNP + */ +public class DbSNPHelper { + public static final String STANDARD_DBSNP_TRACK_NAME = "dbsnp"; + + private DbSNPHelper() {} // don't make a DbSNPHelper + + public static DbSNPFeature getFirstRealSNP(List dbsnpList) { + if (dbsnpList == null) + return null; + + DbSNPFeature dbsnp = null; + for (Object d : dbsnpList) { + if (d instanceof DbSNPFeature && DbSNPHelper.isSNP((DbSNPFeature)d)) { + dbsnp = (DbSNPFeature) d; + break; + } + } + + return dbsnp; + } + + /** + * get the -1 * (log 10 of the error value) + * + * @return the log based error estimate + */ + public static double getNegLog10PError(DbSNPFeature feature) { + return 4; // -log10(0.0001) + } + + // + // What kind of variant are we? + // + // ---------------------------------------------------------------------- + public static boolean isSNP(DbSNPFeature feature) { + return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact"); + } + + public static String toMediumString(DbSNPFeature feature) { + String s = String.format("%s:%d:%s:%s", feature.getChr(), feature.getStart(), feature.getRsID(), Utils.join("",feature.getObserved())); + if (isSNP(feature)) s += ":SNP"; + if (isIndel(feature)) s += ":Indel"; + if (isHapmap(feature)) s += ":Hapmap"; + if (is2Hit2Allele(feature)) s += ":2Hit"; + return s; + } + + public static boolean isInsertion(DbSNPFeature feature) { + return feature.getVariantType().contains("insertion"); + } + + public static boolean isDeletion(DbSNPFeature feature) { + return feature.getVariantType().contains("deletion"); + } + + public static boolean isIndel(DbSNPFeature feature) { + return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || feature.getVariantType().contains("in-del"); + } + + + public static boolean isHapmap(DbSNPFeature feature) { + return feature.getValidationStatus().contains("by-hapmap"); + } + + public static boolean is2Hit2Allele(DbSNPFeature feature) { + return feature.getValidationStatus().contains("by-2hit-2allele"); + } + + /** + * gets the alternate alleles. This method should return all the alleles present at the location, + * NOT including the reference base. This is returned as a string list with no guarantee ordering + * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest + * frequency). + * + * @return an alternate allele list + */ + public static List getAlternateAlleleList(DbSNPFeature feature) { + List ret = new ArrayList(); + for (String allele : getAlleleList(feature)) + if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele); + return ret; + } + + public static boolean onFwdStrand(DbSNPFeature feature) { + return feature.getStrand() == Strand.POSITIVE; + } + + public static String getReference(DbSNPFeature feature) { + return feature.getNCBIRefBase(); + } + + public static String toSimpleString(DbSNPFeature feature) { + return String.format("%s:%s:%s", feature.getRsID(), feature.getObserved(), (feature.getStrand() == Strand.POSITIVE) ? "+" : "-"); + } + + /** + * gets the alleles. This method should return all the alleles present at the location, + * including the reference base. The first allele should always be the reference allele, followed + * by an unordered list of alternate alleles. + * + * @return an alternate allele list + */ + public static List getAlleleList(DbSNPFeature feature) { + List alleleList = new ArrayList(); + // add ref first + if ( onFwdStrand(feature) ) + alleleList = Arrays.asList(feature.getObserved()); + else + for (String str : feature.getObserved()) + alleleList.add(SequenceUtil.reverseComplement(str)); + if ( alleleList.size() > 0 && alleleList.contains(getReference(feature)) && !alleleList.get(0).equals(getReference(feature)) ) + Collections.swap(alleleList, alleleList.indexOf(getReference(feature)), 0); + + return alleleList; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index d5a384949..4c78fdacf 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -25,12 +25,13 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.commandline.Argument; @@ -143,16 +144,16 @@ public class PileupWalker extends LocusWalker implements TreeR private String getReferenceOrderedData( RefMetaDataTracker tracker ) { ArrayList rodStrings = new ArrayList(); for ( GATKFeature datum : tracker.getAllRods() ) { - if ( datum != null && ! (datum.getUnderlyingObject() instanceof rodDbSNP)) { - rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron figure out what to do with this line, it's bad form + if ( datum != null && ! (datum.getUnderlyingObject() instanceof DbSNPFeature)) { + rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it } } String rodString = Utils.join(", ", rodStrings); - rodDbSNP dbsnp = tracker.lookup(rodDbSNP.STANDARD_DBSNP_TRACK_NAME,rodDbSNP.class); + DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class); if ( dbsnp != null) - rodString += dbsnp.toMediumString(); + rodString += DbSNPHelper.toMediumString(dbsnp); if ( !rodString.equals("") ) rodString = "[ROD: " + rodString + "]"; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java index 2f686809f..cbeb9a297 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java @@ -25,11 +25,13 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -58,14 +60,14 @@ public class VariantsToVCF extends RodWalker { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) return 0; - rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)); + DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); Collection contexts = tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), true, false); for ( VariantContext vc : contexts ) { VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false); if ( dbsnp != null ) - vcf.setID(dbsnp.getRS_ID()); + vcf.setID(dbsnp.getRsID()); // set the appropriate sample name if necessary if ( sampleName != null && vcf.hasGenotypeData() && vcf.getGenotype(INPUT_ROD_NAME) != null ) vcf.getGenotype(INPUT_ROD_NAME).setSampleName(sampleName); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index dfd76bcc5..1b7cbc5bb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; @@ -32,8 +33,8 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; @@ -145,7 +146,7 @@ public class VariantAnnotatorEngine { List dataSources = engine.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getName().equals(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) ) { + if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { annotateDbsnp = true; } if ( rod.getName().equals("hapmap2") ) { @@ -181,11 +182,11 @@ public class VariantAnnotatorEngine { // annotate dbsnp occurrence if ( annotateDbsnp ) { - rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)); + DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); infoAnnotations.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1"); // annotate dbsnp id if available and not already there if ( dbsnp != null && !vc.hasAttribute("ID") ) - infoAnnotations.put("ID", dbsnp.getRS_ID()); + infoAnnotations.put("ID", dbsnp.getRsID()); } if ( annotateHapmap2 ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 485ad3793..995ceb174 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; @@ -96,7 +97,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { * * @return the dbsnp rod if there is one at this position */ - public static rodDbSNP getDbSNP(RefMetaDataTracker tracker) { - return rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)); + public static DbSNPFeature getDbSNP(RefMetaDataTracker tracker) { + return DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 3241fa85f..8c1771d8a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; @@ -388,9 +388,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc if ( bestAFguess != 0 ) attributes.put(VCFRecord.ALLELE_FREQUENCY_KEY, new Double((double)bestAFguess / (double)(frequencyEstimationPoints-1))); - rodDbSNP dbsnp = getDbSNP(tracker); + DbSNPFeature dbsnp = getDbSNP(tracker); if ( dbsnp != null ) - attributes.put("ID", dbsnp.getRS_ID()); + attributes.put("ID", dbsnp.getRsID()); if ( !UAC.NO_SLOD ) { // the overall lod diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 68417917a..dcc8fa16d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -33,8 +33,8 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.BaseUtils; @@ -121,7 +121,7 @@ public class UnifiedGenotyperEngine { List dataSources = toolkit.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getName().equals(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) ) { + if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { this.annotateDbsnp = true; } if ( rod.getName().equals("hapmap2") ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 06619bb80..737abce66 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -44,7 +44,7 @@ import net.sf.samtools.util.SequenceUtil; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; @@ -153,7 +153,7 @@ public class TableRecalibrationWalker extends ReadWalker { if ( SNP_MASK != null ) { logger.info("Loading SNP mask... "); ReferenceOrderedData snp_mask; - if ( SNP_MASK.contains(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)) { - snp_mask = new ReferenceOrderedData("snp_mask",new java.io.File(SNP_MASK),rodDbSNP.class); + if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { + TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder(); + Iterator iter = builder.createInstanceOfTrack(DbSNPFeature.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator(); + snpMaskIterator = new SeekableRODIterator(iter); + } else { snp_mask = new ReferenceOrderedData("snp_mask", new java.io.File(SNP_MASK), TabularROD.class); + snpMaskIterator = new SeekableRODIterator(new GATKFeatureIterator(snp_mask.iterator())); } - snpMaskIterator = new SeekableRODIterator(new GATKFeatureIterator(snp_mask.iterator())); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index f14c4e8c7..67e3ef9a9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.playground.utils.report.ReportMarshaller; import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory; @@ -117,7 +117,7 @@ public class VariantEvalWalker extends RodWalker { protected String[] SELECT_NAMES = {}; @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) - protected String[] KNOWN_NAMES = {rodDbSNP.STANDARD_DBSNP_TRACK_NAME}; + protected String[] KNOWN_NAMES = {DbSNPHelper.STANDARD_DBSNP_TRACK_NAME}; @Argument(shortName="sample", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false) protected String[] SAMPLES = {}; @@ -270,7 +270,7 @@ public class VariantEvalWalker extends RodWalker { evalNames.add(d.getName()); } else if ( d.getName().startsWith("comp") ) { compNames.add(d.getName()); - } else if ( d.getName().startsWith(rodDbSNP.STANDARD_DBSNP_TRACK_NAME) || d.getName().startsWith("hapmap") ) { + } else if ( d.getName().startsWith(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) || d.getName().startsWith("hapmap") ) { compNames.add(d.getName()); } else { logger.info("Not evaluating ROD binding " + d.getName()); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/multisamplecaller/MultiSampleCaller.java b/java/src/org/broadinstitute/sting/oneoffprojects/multisamplecaller/MultiSampleCaller.java index 5024162b4..d5168fdbb 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/multisamplecaller/MultiSampleCaller.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/multisamplecaller/MultiSampleCaller.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.commandline.Argument; @@ -904,7 +904,7 @@ public class MultiSampleCaller extends LocusWalker sample_names) { String in_dbsnp; - if (tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME).size() > 0) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; } + if (tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME).size() > 0) { in_dbsnp = "known"; } else { in_dbsnp = "novel"; } AlignmentContext[] contexts = filterAlignmentContext(context, sample_names, 0); glCache.clear(); // reset the cache diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateDbSNPConversion.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateDbSNPConversion.java deleted file mode 100644 index a2d178f57..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateDbSNPConversion.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.oneoffprojects.walkers; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.sting.utils.collections.Pair; - -import java.io.PrintStream; - - -/** - * @author aaron - * - * Class ValidateDbSNPConversion - * - * a quick walker to validate the dbSNP conversion. - */ -public class ValidateDbSNPConversion extends RefWalker, Matrix> { - @Override - public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (!tracker.hasROD(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)) return null; - rodDbSNP rod = tracker.lookup(rodDbSNP.STANDARD_DBSNP_TRACK_NAME,rodDbSNP.class); - if (rod != null && rod.isSNP() && rod.isBiallelic()) { - return new Pair(Matrix.BASE.toBase((byte) ref.getBase()), Matrix.BASE.toBase((byte) rod.getAlternativeBaseForSNP())); - } - return null; - } - - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ - @Override - public Matrix reduceInit() { - return new Matrix(); - } - - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * - * @return accumulator with result of the map taken into account. - */ - @Override - public Matrix reduce(Pair value, Matrix sum) { - if (value != null) sum.addValue(value.first,value.second); - return sum; - } - - public void onTraversalDone(Matrix result) { - result.printStats(out); - } - -} - - -class Matrix { - public enum BASE { - A((byte) 65), C((byte) 67), G((byte) 71), T((byte) 84), N((byte)78); - private byte val = 65; - - BASE(byte val) { - this.val = val; - } - - byte getVal() { - return val; - } - - static BASE toBase(byte b) { - if (b > 96) b =- 32; - if (b == A.val) return A; - if (b == C.val) return C; - if (b == G.val) return G; - if (b == T.val) return T; - // we don't really care, let return N instead of: throw new UnsupportedOperationException("Unknown base " + b); - return N; - } - } - - private int[][] mat = new int[BASE.values().length][BASE.values().length]; - - public void addValue(BASE ref, BASE alt) { - mat[ref.ordinal()][alt.ordinal()] += 1; - } - - public void printStats(PrintStream stream) { - for (BASE a : BASE.values()) { - stream.print("ref " + a.toString() + ":"); - for (BASE b : BASE.values()) { - stream.print(" " + mat[a.ordinal()][b.ordinal()]); - } - stream.println(); - } - } - - public void addMatrix(Matrix mt) { - for (BASE a : BASE.values()) { - for (BASE b : BASE.values()) { - mat[a.ordinal()][b.ordinal()] += mt.mat[a.ordinal()][b.ordinal()]; - } - } - } - - public Matrix() { - for (int x = 0; x < BASE.values().length; x++) { - for (int y = 0; y < BASE.values().length; y++) { - mat[x][y] = 0; - } - } - } -} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPUnitTest.java deleted file mode 100644 index d370da3e8..000000000 --- a/java/test/org/broadinstitute/sting/gatk/refdata/rodDbSNPUnitTest.java +++ /dev/null @@ -1,87 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.*; - - -/** - * @author aaron - *

- * Class rodDbSNPUnitTest - *

- * A descriptions should go here. Blame aaron if it's missing. - */ -public class rodDbSNPUnitTest extends BaseTest { - private static IndexedFastaSequenceFile seq; - - @BeforeClass - public static void beforeTests() { - try { - seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta")); - } catch (FileNotFoundException e) { - throw new StingException("unable to load the sequence dictionary"); - } - GenomeLocParser.setupRefContigOrdering(seq); - - } - - public BufferedReader openFile(String filename) { - try { - return new BufferedReader(new FileReader(filename)); - } catch (FileNotFoundException e) { - throw new StingException("Couldn't open file " + filename); - } - - } - - @Test - // count the number of SNP's between 10M and 11M on chr1 in dbSNP - public void testDBSNPInput() { - BufferedReader stream = openFile("/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod"); - int snpCount = 0; - int indelCount = 0; - try { - String line = stream.readLine(); - rodDbSNP rod = new rodDbSNP("db"); - boolean stop = false; - while (line != null && !stop) { - rod.parseLine(null,line.split("\t")); - rodDbSNP var = (rodDbSNP)rod; - if (rod.isSNP()) { - // quick check, if we're not triallelic, make sure the ref is right - if (var.getReferenceForSNP() == var.refBases.charAt(0) || var.getAlternativeBaseForSNP() == var.refBases.charAt(0)) - // also make sure the ref is a single character - if (var.refBases.length() == 1) - Assert.assertTrue(var.refBases.charAt(0)==var.getReferenceForSNP()); - if (var.getLocation().getContig().equals("1") && - var.getLocation().getStart() >= 10000000 && - var.getLocation().getStart() <= 11000000) { - if (var.isSNP()) { - snpCount++; - } - - } - } - if (rod.isIndel()) - indelCount++; - - stop = (var.getLocation().getContig().equals("1") && var.getLocation().getStart() > 11000000); - line = stream.readLine(); - } - Assert.assertEquals(3615,snpCount); - Assert.assertEquals(9902,indelCount); - - } catch (IOException e) { - e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. - } - } - - -}