diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/HapmapVCFROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/HapmapVCFROD.java new file mode 100644 index 000000000..7a6f9d490 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/HapmapVCFROD.java @@ -0,0 +1,170 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.DiploidGenotype; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; +import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Jan 29, 2010 + * Time: 8:57:54 AM + * To change this template use File | Settings | File Templates. + */ +public class HapmapVCFROD extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, Iterator { +// This is a (hopefully temporary) wrapper class for certain VCF files that we want to protect from +// utilities that grab genotypes or sample names across all VCF files + + private RodVCF rod; + + public VCFReader getReader() { + return rod.getReader(); + } + + public VCFRecord getRecord() { + return rod.getRecord(); + } + + public HapmapVCFROD(String name) { + super(name); + } + + public HapmapVCFROD(String name, VCFRecord currentRecord, VCFReader reader) { + super(name); + rod = new RodVCF(name,currentRecord,reader); + } + + public HapmapVCFROD(String name, RodVCF rod) { + super(name); + this.rod = rod; + } + + public Object initialize(final File source) throws FileNotFoundException { + rod = new RodVCF(name); + rod.initialize(source); + return rod.getHeader(); + } + + public boolean parseLine(Object obj, String[] args) { + try { + return rod.parseLine(obj,args); + } catch (Exception e) { + throw new UnsupportedOperationException("Parse line not supported",e); + } + } + + public double getNegLog10PError() { + return rod.getNegLog10PError(); + } + + public List getGenotypes() { + return rod.getGenotypes(); + } + + public String getReference() { + return rod.getReference(); + } + + public String toString() { + return rod.toString(); + } + + public List getAlternateAlleleList() { + return rod.getAlternateAlleleList(); + } + + public boolean isDeletion() { + return rod.isDeletion(); + } + + public GenomeLoc getLocation() { + return rod.getLocation(); + } + + public boolean isBiallelic() { + return rod.isBiallelic(); + } + + public boolean isIndel() { + return rod.isIndel(); + } + + public Variation.VARIANT_TYPE getType() { + return rod.getType(); + } + + public boolean isSNP() { + return rod.isSNP(); + } + + public boolean isReference() { + return rod.isReference(); + } + + public double getNonRefAlleleFrequency() { + return rod.getNonRefAlleleFrequency(); + } + + public char getAlternativeBaseForSNP() { + return rod.getAlternativeBaseForSNP(); + } + + public boolean isInsertion() { + return rod.isInsertion(); + } + + public List getAlleleList() { + return rod.getAlleleList(); + } + + public Genotype getCalledGenotype() { + return rod.getCalledGenotype(); + } + + public char getReferenceForSNP() { + return rod.getReferenceForSNP(); + } + + public boolean hasGenotype(DiploidGenotype g) { + return rod.hasGenotype(g); + } + + public VCFHeader getHeader() { + return rod.getHeader(); + } + + public boolean hasNext() { + return rod.hasNext(); + } + + public HapmapVCFROD next() { + return new HapmapVCFROD(name,rod.next()); + } + + public void remove() { + rod.remove(); + } + + public static HapmapVCFROD createIterator(String name, File file) { + RodVCF vcf = new RodVCF(name); + try { + vcf.initialize(file); + } catch (FileNotFoundException e) { + throw new StingException("Unable to find file " + file); + } + return new HapmapVCFROD(name,vcf); + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java index a770a950a..ba7867b24 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java @@ -173,7 +173,7 @@ public class RODRecordIterator implements Ite catch ( Exception e ) { throw new StingException("Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+ "\nOffending line: "+line+ - "\nReason ("+e.getClass().getName()+"): "+e.getMessage()); + "\nReason ("+e.getClass().getName()+"): "+e.getMessage(),e); } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index ea7a3d3d4..9c8f143b4 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -77,6 +77,7 @@ public class ReferenceOrderedData implements addModule("GLF", RodGLF.class); addModule("VCF", RodVCF.class); addModule("PicardDbSNP", rodPicardDbSNP.class); + addModule("HapmapVCF",HapmapVCFROD.class); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 3fa89d85e..f986e4356 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -56,8 +56,9 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, } public Object initialize(final File source) throws FileNotFoundException { - if ( mReader == null ) + if ( mReader == null ) { mReader = new VCFReader(source); + } return mReader.getHeader(); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 4aeaab0cd..736b5f61c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -95,14 +95,15 @@ public class VariantAnnotator extends LocusWalker { SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap, String>()); // add the non-VCF sample from the command-line, if applicable - if ( sampleName != null ) { + if ( sampleName != null ) { nonVCFsampleName.put(sampleName.toUpperCase(), "variant"); samples.add(sampleName.toUpperCase()); } // if there are no valid samples, warn the user - if ( samples.size() == 0 ) + if ( samples.size() == 0 ) { logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired."); + } determineAllAnnotations(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 6afb03371..b9d329aad 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -325,7 +325,11 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { } public boolean isNovel() { - return mID == null && ! isInHapmap(); + return ( ! isInDBSNP() ) && ( ! isInHapmap() ); + } + + public boolean isInDBSNP() { + return ( mID != null || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) ); } public boolean isInHapmap() {