Not nearly as badass as it looks. The problem I mentioned yesterday with "bleeding in" of samples comes from VCFUtils and SampleUtils looking for all VCF-class RODs in the tracker, and stealing the name from them. I have introduced a new HapmapVCF - type rod for use

when you want to protect your VCF header from being infected by the samples in a bound hapmap VCF. Changes are as follows:

VCFRecord - minor change to adapt isNovel() to the case where the dbsnp ID field is empty, but the info field has DB=1

HapmapVCFRod - introduced for the reason at the top

RODRecordIterator - was: catch ( Exception e ) { throw new StingException("long ass message") }
                 is now: catch ( Exception e ) { throw new StingException("long ass message",e) }
                    to permit full stack ejaculation.

RodVCF - Now with more brackets!

ReferenceOrderedData - registering HapmapVCF as a bindable string

VariantAnnotator - There's an extra space on a line. And some new brackets.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2733 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-01-29 15:19:50 +00:00
parent 5aaf4e6434
commit d57a86ad41
6 changed files with 182 additions and 5 deletions

View File

@ -0,0 +1,170 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Jan 29, 2010
* Time: 8:57:54 AM
* To change this template use File | Settings | File Templates.
*/
public class HapmapVCFROD extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, Iterator<HapmapVCFROD> {
// This is a (hopefully temporary) wrapper class for certain VCF files that we want to protect from
// utilities that grab genotypes or sample names across all VCF files
private RodVCF rod;
public VCFReader getReader() {
return rod.getReader();
}
public VCFRecord getRecord() {
return rod.getRecord();
}
public HapmapVCFROD(String name) {
super(name);
}
public HapmapVCFROD(String name, VCFRecord currentRecord, VCFReader reader) {
super(name);
rod = new RodVCF(name,currentRecord,reader);
}
public HapmapVCFROD(String name, RodVCF rod) {
super(name);
this.rod = rod;
}
public Object initialize(final File source) throws FileNotFoundException {
rod = new RodVCF(name);
rod.initialize(source);
return rod.getHeader();
}
public boolean parseLine(Object obj, String[] args) {
try {
return rod.parseLine(obj,args);
} catch (Exception e) {
throw new UnsupportedOperationException("Parse line not supported",e);
}
}
public double getNegLog10PError() {
return rod.getNegLog10PError();
}
public List<Genotype> getGenotypes() {
return rod.getGenotypes();
}
public String getReference() {
return rod.getReference();
}
public String toString() {
return rod.toString();
}
public List<String> getAlternateAlleleList() {
return rod.getAlternateAlleleList();
}
public boolean isDeletion() {
return rod.isDeletion();
}
public GenomeLoc getLocation() {
return rod.getLocation();
}
public boolean isBiallelic() {
return rod.isBiallelic();
}
public boolean isIndel() {
return rod.isIndel();
}
public Variation.VARIANT_TYPE getType() {
return rod.getType();
}
public boolean isSNP() {
return rod.isSNP();
}
public boolean isReference() {
return rod.isReference();
}
public double getNonRefAlleleFrequency() {
return rod.getNonRefAlleleFrequency();
}
public char getAlternativeBaseForSNP() {
return rod.getAlternativeBaseForSNP();
}
public boolean isInsertion() {
return rod.isInsertion();
}
public List<String> getAlleleList() {
return rod.getAlleleList();
}
public Genotype getCalledGenotype() {
return rod.getCalledGenotype();
}
public char getReferenceForSNP() {
return rod.getReferenceForSNP();
}
public boolean hasGenotype(DiploidGenotype g) {
return rod.hasGenotype(g);
}
public VCFHeader getHeader() {
return rod.getHeader();
}
public boolean hasNext() {
return rod.hasNext();
}
public HapmapVCFROD next() {
return new HapmapVCFROD(name,rod.next());
}
public void remove() {
rod.remove();
}
public static HapmapVCFROD createIterator(String name, File file) {
RodVCF vcf = new RodVCF(name);
try {
vcf.initialize(file);
} catch (FileNotFoundException e) {
throw new StingException("Unable to find file " + file);
}
return new HapmapVCFROD(name,vcf);
}
}

View File

@ -173,7 +173,7 @@ public class RODRecordIterator<ROD extends ReferenceOrderedDatum> implements Ite
catch ( Exception e ) {
throw new StingException("Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+
"\nOffending line: "+line+
"\nReason ("+e.getClass().getName()+"): "+e.getMessage());
"\nReason ("+e.getClass().getName()+"): "+e.getMessage(),e);
}
}

View File

@ -77,6 +77,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
addModule("GLF", RodGLF.class);
addModule("VCF", RodVCF.class);
addModule("PicardDbSNP", rodPicardDbSNP.class);
addModule("HapmapVCF",HapmapVCFROD.class);
}

View File

@ -56,8 +56,9 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
}
public Object initialize(final File source) throws FileNotFoundException {
if ( mReader == null )
if ( mReader == null ) {
mReader = new VCFReader(source);
}
return mReader.getHeader();
}

View File

@ -95,14 +95,15 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
// add the non-VCF sample from the command-line, if applicable
if ( sampleName != null ) {
if ( sampleName != null ) {
nonVCFsampleName.put(sampleName.toUpperCase(), "variant");
samples.add(sampleName.toUpperCase());
}
// if there are no valid samples, warn the user
if ( samples.size() == 0 )
if ( samples.size() == 0 ) {
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
}
determineAllAnnotations();

View File

@ -325,7 +325,11 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
}
public boolean isNovel() {
return mID == null && ! isInHapmap();
return ( ! isInDBSNP() ) && ( ! isInHapmap() );
}
public boolean isInDBSNP() {
return ( mID != null || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) );
}
public boolean isInHapmap() {