VariantsToVCF is up and running again; integration tests are reenabled (and added one for dbSNP).ant
This commit is contained in:
parent
cb28875c2a
commit
70b3daf689
|
|
@ -112,7 +112,7 @@ public class VariantContextAdaptors {
|
||||||
alleles.add(refAllele);
|
alleles.add(refAllele);
|
||||||
|
|
||||||
// add all of the alt alleles
|
// add all of the alt alleles
|
||||||
boolean sawNullAllele = false;
|
boolean sawNullAllele = refAllele.isNull();
|
||||||
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
|
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
|
||||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||||
|
|
@ -133,7 +133,7 @@ public class VariantContextAdaptors {
|
||||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||||
|
|
||||||
Map<String, Genotype> genotypes = null;
|
Map<String, Genotype> genotypes = null;
|
||||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
|
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
|
||||||
return vc;
|
return vc;
|
||||||
} else
|
} else
|
||||||
return null; // can't handle anything else
|
return null; // can't handle anything else
|
||||||
|
|
@ -163,16 +163,6 @@ public class VariantContextAdaptors {
|
||||||
@Override
|
@Override
|
||||||
public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
|
public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
|
||||||
|
|
||||||
/**
|
|
||||||
* convert to a Variant Context, given:
|
|
||||||
* @param name the name of the ROD
|
|
||||||
* @param input the Rod object, in this case a RodGeliText
|
|
||||||
* @return a VariantContext object
|
|
||||||
*/
|
|
||||||
// VariantContext convert(String name, Object input) {
|
|
||||||
// return convert(name, input, null);
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* convert to a Variant Context, given:
|
* convert to a Variant Context, given:
|
||||||
* @param name the name of the ROD
|
* @param name the name of the ROD
|
||||||
|
|
@ -238,16 +228,6 @@ public class VariantContextAdaptors {
|
||||||
@Override
|
@Override
|
||||||
public Class<? extends Feature> getAdaptableFeatureType() { return HapMapFeature.class; }
|
public Class<? extends Feature> getAdaptableFeatureType() { return HapMapFeature.class; }
|
||||||
|
|
||||||
/**
|
|
||||||
* convert to a Variant Context, given:
|
|
||||||
* @param name the name of the ROD
|
|
||||||
* @param input the Rod object, in this case a RodGeliText
|
|
||||||
* @return a VariantContext object
|
|
||||||
*/
|
|
||||||
// VariantContext convert(String name, Object input) {
|
|
||||||
// return convert(name, input, null);
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* convert to a Variant Context, given:
|
* convert to a Variant Context, given:
|
||||||
* @param name the name of the ROD
|
* @param name the name of the ROD
|
||||||
|
|
@ -262,6 +242,11 @@ public class VariantContextAdaptors {
|
||||||
|
|
||||||
HapMapFeature hapmap = (HapMapFeature)input;
|
HapMapFeature hapmap = (HapMapFeature)input;
|
||||||
|
|
||||||
|
int index = hapmap.getStart() - ref.getWindow().getStart();
|
||||||
|
if ( index < 0 )
|
||||||
|
return null; // we weren't given enough reference context to create the VariantContext
|
||||||
|
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||||
|
|
||||||
HashSet<Allele> alleles = new HashSet<Allele>();
|
HashSet<Allele> alleles = new HashSet<Allele>();
|
||||||
Allele refSNPAllele = Allele.create(ref.getBase(), true);
|
Allele refSNPAllele = Allele.create(ref.getBase(), true);
|
||||||
int deletionLength = -1;
|
int deletionLength = -1;
|
||||||
|
|
@ -320,7 +305,7 @@ public class VariantContextAdaptors {
|
||||||
long end = hapmap.getEnd();
|
long end = hapmap.getEnd();
|
||||||
if ( deletionLength > 0 )
|
if ( deletionLength > 0 )
|
||||||
end += deletionLength;
|
end += deletionLength;
|
||||||
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs);
|
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs, refBaseForIndel);
|
||||||
return vc;
|
return vc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ public class DbSNPHelper {
|
||||||
return dbsnp;
|
return dbsnp;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String rsIDOfFirstRealSNP(List<Feature> featureList) {
|
public static String rsIDOfFirstRealSNP(List<Feature> featureList, boolean deleteMe) {
|
||||||
if (featureList == null)
|
if (featureList == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -81,6 +81,21 @@ public class DbSNPHelper {
|
||||||
return rsID;
|
return rsID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String rsIDOfFirstRealSNP(List<VariantContext> VCs) {
|
||||||
|
if ( VCs == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
String rsID = null;
|
||||||
|
for ( VariantContext vc : VCs ) {
|
||||||
|
if ( vc.isSNP() ) {
|
||||||
|
rsID = vc.getID();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rsID;
|
||||||
|
}
|
||||||
|
|
||||||
public static String rsIDOfFirstRealIndel(List<Feature> featureList) {
|
public static String rsIDOfFirstRealIndel(List<Feature> featureList) {
|
||||||
if (featureList == null)
|
if (featureList == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -155,7 +155,7 @@ public class VariantAnnotatorEngine {
|
||||||
String rsID = null;
|
String rsID = null;
|
||||||
|
|
||||||
if (vc.isSNP())
|
if (vc.isSNP())
|
||||||
rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME), true);
|
||||||
else if (vc.isIndel())
|
else if (vc.isIndel())
|
||||||
rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null );
|
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null );
|
||||||
|
|
|
||||||
|
|
@ -27,15 +27,12 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
|
||||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||||
import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
|
import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
|
||||||
|
|
@ -43,6 +40,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature;
|
import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
|
|
@ -52,6 +50,7 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -63,10 +62,13 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
@Output(doc="File to which variants should be written",required=true)
|
@Output(doc="File to which variants should be written",required=true)
|
||||||
protected VCFWriter baseWriter = null;
|
protected VCFWriter baseWriter = null;
|
||||||
private SortingVCFWriter vcfwriter; // needed because hapmap indel records move
|
private SortingVCFWriter vcfwriter; // needed because hapmap/dbsnp indel records move
|
||||||
|
|
||||||
@Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true)
|
@Input(fullName="variant", shortName = "V", doc="Input variant file", required=true)
|
||||||
public RodBinding<VariantContext> variants;
|
public RodBinding<Feature> variants;
|
||||||
|
|
||||||
|
@Input(fullName="dbsnp", shortName = "D", doc="dbSNP VCF for populating rsIDs", required=false)
|
||||||
|
public RodBinding<VariantContext> dbsnp;
|
||||||
|
|
||||||
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false)
|
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false)
|
||||||
protected String sampleName = null;
|
protected String sampleName = null;
|
||||||
|
|
@ -77,10 +79,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
|
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
|
||||||
private boolean wroteHeader = false;
|
private boolean wroteHeader = false;
|
||||||
|
|
||||||
// Don't allow mixed types for now
|
|
||||||
private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP,
|
|
||||||
VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL, VariantContext.Type.MNP);
|
|
||||||
|
|
||||||
// for dealing with indels in hapmap
|
// for dealing with indels in hapmap
|
||||||
CloseableIterator<GATKFeature> dbsnpIterator = null;
|
CloseableIterator<GATKFeature> dbsnpIterator = null;
|
||||||
|
|
||||||
|
|
@ -92,128 +90,108 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
String rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
String rsID = dbsnp == null ? null : DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(dbsnp, context.getLocation()));
|
||||||
|
|
||||||
Collection<VariantContext> contexts = getVariantContexts(tracker, ref);
|
Collection<VariantContext> contexts = getVariantContexts(tracker, ref);
|
||||||
|
|
||||||
for ( VariantContext vc : contexts ) {
|
for ( VariantContext vc : contexts ) {
|
||||||
if ( ALLOWED_VARIANT_CONTEXT_TYPES.contains(vc.getType()) ) {
|
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||||
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
if ( rsID != null && !vc.hasID() ) {
|
||||||
if ( rsID != null && !vc.hasID() ) {
|
attrs.put(VariantContext.ID_KEY, rsID);
|
||||||
attrs.put(VariantContext.ID_KEY, rsID);
|
vc = VariantContext.modifyAttributes(vc, attrs);
|
||||||
vc = VariantContext.modifyAttributes(vc, attrs);
|
|
||||||
}
|
|
||||||
|
|
||||||
// set the appropriate sample name if necessary
|
|
||||||
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
|
|
||||||
Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
|
|
||||||
Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
|
||||||
genotypes.put(sampleName, g);
|
|
||||||
vc = VariantContext.modifyGenotypes(vc, genotypes);
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo - fix me. This may not be the cleanest way to handle features what need correct indel padding
|
|
||||||
if (fixReferenceBase) {
|
|
||||||
vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase());
|
|
||||||
}
|
|
||||||
|
|
||||||
writeRecord(vc, tracker, ref.getBase());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set the appropriate sample name if necessary
|
||||||
|
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
|
||||||
|
Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
|
||||||
|
Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
||||||
|
genotypes.put(sampleName, g);
|
||||||
|
vc = VariantContext.modifyGenotypes(vc, genotypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( fixReferenceBase ) {
|
||||||
|
vc = VariantContext.modifyReferencePadding(vc, ref.getBase());
|
||||||
|
}
|
||||||
|
|
||||||
|
writeRecord(vc, tracker, ref.getLocus());
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Collection<VariantContext> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref) {
|
private Collection<VariantContext> getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref) {
|
||||||
// we need to special case the HapMap format because indels aren't handled correctly
|
|
||||||
List<Feature> features = tracker.getValues(Feature.class, variants.getName());
|
|
||||||
if ( features.size() > 0 && features.get(0) instanceof HapMapFeature ) {
|
|
||||||
ArrayList<VariantContext> hapmapVCs = new ArrayList<VariantContext>(features.size());
|
|
||||||
for ( Object feature : features ) {
|
|
||||||
HapMapFeature hapmap = (HapMapFeature)feature;
|
|
||||||
Byte refBase = null;
|
|
||||||
|
|
||||||
// if it's an indel, we need to figure out the alleles
|
List<Feature> features = tracker.getValues(variants, ref.getLocus());
|
||||||
if ( hapmap.getAlleles()[0].equals("-") ) {
|
List<VariantContext> VCs = new ArrayList<VariantContext>(features.size());
|
||||||
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
|
|
||||||
|
|
||||||
// get the dbsnp object corresponding to this record, so we can learn whether this is an insertion or deletion
|
for ( Feature record : features ) {
|
||||||
DbSNPFeature dbsnp = getDbsnpFeature(hapmap.getName());
|
if ( VariantContextAdaptors.canBeConvertedToVariantContext(record) ) {
|
||||||
if ( dbsnp == null || dbsnp.getVariantType().equalsIgnoreCase("mixed") )
|
// we need to special case the HapMap format because indels aren't handled correctly
|
||||||
continue;
|
if ( record instanceof HapMapFeature) {
|
||||||
|
|
||||||
boolean isInsertion = dbsnp.getVariantType().equalsIgnoreCase("insertion");
|
// is it an indel?
|
||||||
|
HapMapFeature hapmap = (HapMapFeature)record;
|
||||||
|
if ( hapmap.getAlleles()[0].equals(HapMapFeature.NULL_ALLELE_STRING) || hapmap.getAlleles()[1].equals(HapMapFeature.NULL_ALLELE_STRING) ) {
|
||||||
|
// get the dbsnp object corresponding to this record (needed to help us distinguish between insertions and deletions)
|
||||||
|
VariantContext dbsnpVC = getDbsnp(hapmap.getName());
|
||||||
|
if ( dbsnpVC == null || dbsnpVC.isMixed() )
|
||||||
|
continue;
|
||||||
|
|
||||||
alleleMap.put(HapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, isInsertion));
|
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
|
||||||
alleleMap.put(HapMapFeature.INSERTION, Allele.create(hapmap.getAlleles()[1], !isInsertion));
|
alleleMap.put(HapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isInsertion()));
|
||||||
hapmap.setActualAlleles(alleleMap);
|
alleleMap.put(HapMapFeature.INSERTION, Allele.create(((HapMapFeature)record).getAlleles()[1], !dbsnpVC.isInsertion()));
|
||||||
|
hapmap.setActualAlleles(alleleMap);
|
||||||
|
|
||||||
// also, use the correct positioning for insertions
|
// also, use the correct positioning for insertions
|
||||||
if ( isInsertion )
|
hapmap.updatePosition(dbsnpVC.getStart());
|
||||||
hapmap.updatePosition(dbsnp.getStart());
|
|
||||||
else
|
|
||||||
hapmap.updatePosition(dbsnp.getStart() - 1);
|
|
||||||
|
|
||||||
if ( hapmap.getStart() < ref.getWindow().getStart() ) {
|
if ( hapmap.getStart() < ref.getWindow().getStart() ) {
|
||||||
logger.warn("Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping...");
|
logger.warn("Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping...");
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
refBase = ref.getBases()[hapmap.getStart() - ref.getWindow().getStart()];
|
|
||||||
}
|
|
||||||
VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), hapmap, ref);
|
|
||||||
if ( vc != null ) {
|
|
||||||
if ( refBase != null ) {
|
|
||||||
// TODO -- fix me
|
|
||||||
//Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
|
||||||
//attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase);
|
|
||||||
//vc = VariantContext.modifyAttributes(vc, attrs);
|
|
||||||
}
|
|
||||||
hapmapVCs.add(vc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ok, we might actually be able to turn this record in a variant context
|
||||||
|
VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), record, ref);
|
||||||
|
|
||||||
|
if ( vc != null ) // sometimes the track has odd stuff in it that can't be converted
|
||||||
|
VCs.add(vc);
|
||||||
}
|
}
|
||||||
return hapmapVCs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// for everything else, we can just convert to VariantContext
|
return VCs;
|
||||||
return tracker.getValues(variants, ref.getLocus());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private DbSNPFeature getDbsnpFeature(String rsID) {
|
private VariantContext getDbsnp(String rsID) {
|
||||||
if ( dbsnpIterator == null ) {
|
if ( dbsnpIterator == null ) {
|
||||||
ReferenceOrderedDataSource dbsnpDataSource = null;
|
|
||||||
for ( ReferenceOrderedDataSource ds : getToolkit().getRodDataSources() ) {
|
|
||||||
if ( ds.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
|
|
||||||
dbsnpDataSource = ds;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( dbsnpDataSource == null )
|
if ( dbsnp == null )
|
||||||
throw new UserException.BadInput("No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records");
|
throw new UserException.BadInput("No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records");
|
||||||
|
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
|
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
|
||||||
dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, dbsnpDataSource.getFile()).getIterator();
|
dbsnpIterator = builder.createInstanceOfTrack(VCFCodec.class, new File(dbsnp.getSource())).getIterator();
|
||||||
// Note that we should really use some sort of seekable iterator here so that the search doesn't take forever
|
// Note that we should really use some sort of seekable iterator here so that the search doesn't take forever
|
||||||
// (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to)
|
// (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to)
|
||||||
}
|
}
|
||||||
|
|
||||||
while ( dbsnpIterator.hasNext() ) {
|
while ( dbsnpIterator.hasNext() ) {
|
||||||
GATKFeature feature = dbsnpIterator.next();
|
GATKFeature feature = dbsnpIterator.next();
|
||||||
DbSNPFeature dbsnp = (DbSNPFeature)feature.getUnderlyingObject();
|
VariantContext vc = (VariantContext)feature.getUnderlyingObject();
|
||||||
if ( dbsnp.getRsID().equals(rsID) )
|
if ( vc.hasID() && vc.getID().equals(rsID) )
|
||||||
return dbsnp;
|
return vc;
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, byte ref) {
|
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
|
||||||
if ( !wroteHeader ) {
|
if ( !wroteHeader ) {
|
||||||
wroteHeader = true;
|
wroteHeader = true;
|
||||||
|
|
||||||
// setup the header fields
|
// setup the header fields
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
|
||||||
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||||
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||||
|
|
||||||
|
|
@ -232,13 +210,13 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));
|
samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));
|
||||||
|
|
||||||
if ( samples.isEmpty() ) {
|
if ( samples.isEmpty() ) {
|
||||||
List<Feature> rods = tracker.getValues(Feature.class, variants.getName());
|
List<Feature> features = tracker.getValues(variants, loc);
|
||||||
if ( rods.size() == 0 )
|
if ( features.size() == 0 )
|
||||||
throw new IllegalStateException("No rod data is present");
|
throw new IllegalStateException("No rod data is present, but we just created a VariantContext");
|
||||||
|
|
||||||
Object rod = rods.get(0);
|
Feature f = features.get(0);
|
||||||
if ( rod instanceof HapMapFeature)
|
if ( f instanceof HapMapFeature )
|
||||||
samples.addAll(Arrays.asList(((HapMapFeature)rod).getSampleIDs()));
|
samples.addAll(Arrays.asList(((HapMapFeature)f).getSampleIDs()));
|
||||||
else
|
else
|
||||||
samples.addAll(vc.getSampleNames());
|
samples.addAll(vc.getSampleNames());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class HapMapFeature implements Feature {
|
public class HapMapFeature implements Feature {
|
||||||
|
|
||||||
|
public static final String NULL_ALLELE_STRING = "-";
|
||||||
public static final String INSERTION = "I";
|
public static final String INSERTION = "I";
|
||||||
public static final String DELETION = "D";
|
public static final String DELETION = "D";
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -396,6 +396,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
|
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) {
|
||||||
|
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true);
|
||||||
|
}
|
||||||
|
|
||||||
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
|
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
|
||||||
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
|
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,73 +15,90 @@ import java.util.ArrayList;
|
||||||
* test(s) for the VariantsToVCF walker.
|
* test(s) for the VariantsToVCF walker.
|
||||||
*/
|
*/
|
||||||
public class VariantsToVCFIntegrationTest extends WalkerTest {
|
public class VariantsToVCFIntegrationTest extends WalkerTest {
|
||||||
// TODO -- eric, fix me
|
|
||||||
// @Test
|
@Test
|
||||||
// public void testVariantsToVCFUsingGeliInput() {
|
public void testVariantsToVCFUsingDbsnpInput() {
|
||||||
// List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
// md5.add("4accae035d271b35ee2ec58f403c68c6");
|
md5.add("d64942fed2a5b7b407f9537dd2b4832e");
|
||||||
//
|
|
||||||
// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
// "-R " + b36KGReference +
|
"-R " + b36KGReference +
|
||||||
// " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
|
" --variant:dbsnp " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||||
// " -T VariantsToVCF" +
|
" -T VariantsToVCF" +
|
||||||
// " -L 1:10,000,000-11,000,000" +
|
" -L 1:1-30,000,000" +
|
||||||
// " -sample NA123AB" +
|
" -o %s" +
|
||||||
// " -o %s" +
|
" -NO_HEADER",
|
||||||
// " -NO_HEADER",
|
1, // just one output file
|
||||||
// 1, // just one output file
|
md5);
|
||||||
// md5);
|
executeTest("testVariantsToVCFUsingDbsnpInput", spec).getFirst();
|
||||||
// executeTest("testVariantsToVCFUsingGeliInput #1", spec).getFirst();
|
}
|
||||||
// }
|
|
||||||
//
|
@Test
|
||||||
// @Test
|
public void testVariantsToVCFUsingGeliInput() {
|
||||||
// public void testGenotypesToVCFUsingGeliInput() {
|
List<String> md5 = new ArrayList<String>();
|
||||||
// List<String> md5 = new ArrayList<String>();
|
md5.add("4accae035d271b35ee2ec58f403c68c6");
|
||||||
// md5.add("71e8c98d7c3a73b6287ecc339086fe03");
|
|
||||||
//
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
"-R " + b36KGReference +
|
||||||
// "-R " + b36KGReference +
|
" --variant:GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
|
||||||
// " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
|
" -T VariantsToVCF" +
|
||||||
// " -T VariantsToVCF" +
|
" -L 1:10,000,000-11,000,000" +
|
||||||
// " -L 1:10,000,000-11,000,000" +
|
" -sample NA123AB" +
|
||||||
// " -sample NA123AB" +
|
" -o %s" +
|
||||||
// " -o %s" +
|
" -NO_HEADER",
|
||||||
// " -NO_HEADER",
|
1, // just one output file
|
||||||
// 1, // just one output file
|
md5);
|
||||||
// md5);
|
executeTest("testVariantsToVCFUsingGeliInput - calls", spec).getFirst();
|
||||||
// executeTest("testVariantsToVCFUsingGeliInput #2", spec).getFirst();
|
}
|
||||||
// }
|
|
||||||
//
|
@Test
|
||||||
// @Test
|
public void testGenotypesToVCFUsingGeliInput() {
|
||||||
// public void testGenotypesToVCFUsingHapMapInput() {
|
List<String> md5 = new ArrayList<String>();
|
||||||
// List<String> md5 = new ArrayList<String>();
|
md5.add("2413f036ec4100b8d5db179946159a82");
|
||||||
// md5.add("f343085305e80c7a2493422e4eaad983");
|
|
||||||
//
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
"-R " + b36KGReference +
|
||||||
// "-R " + b36KGReference +
|
" --variant:GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
|
||||||
// " -B:variant,HapMap " + validationDataLocation + "rawHapMap.yri.chr1.txt" +
|
" -T VariantsToVCF" +
|
||||||
// " -T VariantsToVCF" +
|
" -L 1:10,100,000-10,200,000" +
|
||||||
// " -L 1:1-1,000,000" +
|
" -sample NA123AB" +
|
||||||
// " -o %s" +
|
" -o %s" +
|
||||||
// " -NO_HEADER",
|
" -NO_HEADER",
|
||||||
// 1, // just one output file
|
1, // just one output file
|
||||||
// md5);
|
md5);
|
||||||
// executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst();
|
executeTest("testVariantsToVCFUsingGeliInput - genotypes", spec).getFirst();
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// @Test
|
@Test
|
||||||
// public void testGenotypesToVCFUsingVCFInput() {
|
public void testGenotypesToVCFUsingHapMapInput() {
|
||||||
// List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
// md5.add("86f02e2e764ba35854cff2aa05a1fdd8");
|
md5.add("f343085305e80c7a2493422e4eaad983");
|
||||||
//
|
|
||||||
// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
// "-R " + b36KGReference +
|
"-R " + b36KGReference +
|
||||||
// " -B:variant,VCF " + validationDataLocation + "complexExample.vcf4" +
|
" --variant:HapMap " + validationDataLocation + "rawHapMap.yri.chr1.txt" +
|
||||||
// " -T VariantsToVCF" +
|
" -T VariantsToVCF" +
|
||||||
// " -o %s" +
|
" -L 1:1-1,000,000" +
|
||||||
// " -NO_HEADER",
|
" -o %s" +
|
||||||
// 1, // just one output file
|
" -NO_HEADER",
|
||||||
// md5);
|
1, // just one output file
|
||||||
// executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst();
|
md5);
|
||||||
// }
|
executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGenotypesToVCFUsingVCFInput() {
|
||||||
|
List<String> md5 = new ArrayList<String>();
|
||||||
|
md5.add("86f02e2e764ba35854cff2aa05a1fdd8");
|
||||||
|
|
||||||
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-R " + b36KGReference +
|
||||||
|
" --variant:VCF " + validationDataLocation + "complexExample.vcf4" +
|
||||||
|
" -T VariantsToVCF" +
|
||||||
|
" -o %s" +
|
||||||
|
" -NO_HEADER",
|
||||||
|
1, // just one output file
|
||||||
|
md5);
|
||||||
|
executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue