Redoing the conversion to VariantContext: instead of walkers passing in a ref allele, they pass in the ref context and the adaptors create the allele. This is the right way of doing it.

Also, adding some more useful integration tests.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3194 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-04-19 05:47:17 +00:00
parent 4eba9bffc1
commit d73c63a99a
13 changed files with 184 additions and 116 deletions

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -113,7 +113,7 @@ public class RefMetaDataTracker {
* Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records * Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
* at the current site, they all will be added to the list as separate elements. * at the current site, they all will be added to the list as separate elements.
* *
* @return * @return collection of all rods
*/ */
public Collection<GATKFeature> getAllRods() { public Collection<GATKFeature> getAllRods() {
List<GATKFeature> l = new ArrayList<GATKFeature>(); List<GATKFeature> l = new ArrayList<GATKFeature>();
@ -129,7 +129,7 @@ public class RefMetaDataTracker {
* Get all of the RMD tracks at the current site. Each track is returned as a single compound * Get all of the RMD tracks at the current site. Each track is returned as a single compound
* object (RODRecordList) that may contain multiple RMD records associated with the current site. * object (RODRecordList) that may contain multiple RMD records associated with the current site.
* *
* @return * @return collection of all tracks
*/ */
public Collection<RODRecordList> getBoundRodTracks() { public Collection<RODRecordList> getBoundRodTracks() {
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>(); LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>();
@ -176,9 +176,13 @@ public class RefMetaDataTracker {
/** /**
* Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number * Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
* of entries per ROD. * of entries per ROD.
* The name of each VariantContext corresponds to the ROD name.
*
* @param ref reference context
* @return variant context
*/ */
public Collection<VariantContext> getAllVariantContexts() { public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref) {
return getAllVariantContexts(null, null, false, false); return getAllVariantContexts(ref, null, null, false, false);
} }
/** /**
@ -192,17 +196,18 @@ public class RefMetaDataTracker {
* *
* The name of each VariantContext corresponds to the ROD name. * The name of each VariantContext corresponds to the ROD name.
* *
* @param curLocation location * @param ref reference context
* @param allowedTypes allowed types * @param allowedTypes allowed types
* @param curLocation location
* @param requireStartHere do we require the rod to start at this location? * @param requireStartHere do we require the rod to start at this location?
* @param takeFirstOnly do we take the first rod only? * @param takeFirstOnly do we take the first rod only?
* @return variant context * @return variant context
*/ */
public Collection<VariantContext> getAllVariantContexts(EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
List<VariantContext> contexts = new ArrayList<VariantContext>(); List<VariantContext> contexts = new ArrayList<VariantContext>();
for ( RODRecordList rodList : getBoundRodTracks() ) { for ( RODRecordList rodList : getBoundRodTracks() ) {
addVariantContexts(contexts, rodList, allowedTypes, curLocation, null, requireStartHere, takeFirstOnly); addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
} }
return contexts; return contexts;
@ -221,25 +226,25 @@ public class RefMetaDataTracker {
* @return variant context * @return variant context
*/ */
public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, null, requireStartHere, takeFirstOnly); return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
} }
public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) { public Collection<VariantContext> getVariantContexts(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, ref, requireStartHere, takeFirstOnly); return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
} }
public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(names, allowedTypes, curLocation, null, requireStartHere, takeFirstOnly); return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
} }
public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) { public Collection<VariantContext> getVariantContexts(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
Collection<VariantContext> contexts = new ArrayList<VariantContext>(); Collection<VariantContext> contexts = new ArrayList<VariantContext>();
for ( String name : names ) { for ( String name : names ) {
RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match
if ( rodList != null ) if ( rodList != null )
addVariantContexts(contexts, rodList, allowedTypes, curLocation, ref, requireStartHere, takeFirstOnly ); addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
} }
return contexts; return contexts;
@ -267,15 +272,11 @@ public class RefMetaDataTracker {
} }
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) { private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
for ( GATKFeature rec : rodList ) { for ( GATKFeature rec : rodList ) {
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) {
// ok, we might actually be able to turn this record in a variant context // ok, we might actually be able to turn this record in a variant context
VariantContext vc; VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
if ( ref == null )
vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject());
else
vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted
continue; continue;
@ -304,8 +305,9 @@ public class RefMetaDataTracker {
* defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise: * defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise:
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
* regardless of the presence of "extended" RODs overlapping with that location). * regardless of the presence of "extended" RODs overlapping with that location).
* @param name * @param name track name
* @return * @param requireExactMatch do we require an exact match of the rod name?
* @return track data for the given rod
*/ */
private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) { private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) {
//logger.debug(String.format("Lookup %s%n", name)); //logger.debug(String.format("Lookup %s%n", name));
@ -335,7 +337,7 @@ public class RefMetaDataTracker {
/** /**
* Returns the canonical name of the rod name (lowercases it) * Returns the canonical name of the rod name (lowercases it)
* @param name the name of the rod * @param name the name of the rod
* @return * @return canonical name of the rod
*/ */
private final String canonicalName(final String name) { private final String canonicalName(final String name) {
return name.toLowerCase(); return name.toLowerCase();

View File

@ -4,9 +4,11 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall; import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
@ -56,7 +58,7 @@ public class VariantContextAdaptors {
/** generic superclass */ /** generic superclass */
private static abstract class VCAdaptor { private static abstract class VCAdaptor {
abstract VariantContext convert(String name, Object input); abstract VariantContext convert(String name, Object input);
abstract VariantContext convert(String name, Object input, Allele refAllele); abstract VariantContext convert(String name, Object input, ReferenceContext ref);
} }
public static VariantContext toVariantContext(String name, Object variantContainingObject) { public static VariantContext toVariantContext(String name, Object variantContainingObject) {
@ -67,11 +69,11 @@ public class VariantContextAdaptors {
} }
} }
public static VariantContext toVariantContext(String name, Object variantContainingObject, Allele refAllele) { public static VariantContext toVariantContext(String name, Object variantContainingObject, ReferenceContext ref) {
if ( ! adaptors.containsKey(variantContainingObject.getClass()) ) if ( ! adaptors.containsKey(variantContainingObject.getClass()) )
return null; return null;
else { else {
return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject, refAllele); return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject, ref);
} }
} }
@ -91,14 +93,15 @@ public class VariantContextAdaptors {
private static class RodDBSnpAdaptor extends VCAdaptor { private static class RodDBSnpAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) { VariantContext convert(String name, Object input) {
if ( ! Allele.acceptableAlleleBases(((rodDbSNP)input).getReference()) ) return convert(name, input, null);
return null;
Allele refAllele = new Allele(((rodDbSNP)input).getReference(), true);
return convert(name, input, refAllele);
} }
VariantContext convert(String name, Object input, Allele refAllele) { VariantContext convert(String name, Object input, ReferenceContext ref) {
rodDbSNP dbsnp = (rodDbSNP)input; rodDbSNP dbsnp = (rodDbSNP)input;
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) )
return null;
Allele refAllele = new Allele(dbsnp.getReference(), true);
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) { if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
// add the reference allele // add the reference allele
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
@ -132,28 +135,25 @@ public class VariantContextAdaptors {
private static class RodVCFAdaptor extends VCAdaptor { private static class RodVCFAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) { VariantContext convert(String name, Object input) {
Allele refAllele = new Allele(((RodVCF)input).getReference(), true); return vcfToVariantContext(name, ((RodVCF)input).getRecord(), null);
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), refAllele);
} }
VariantContext convert(String name, Object input, Allele refAllele) { VariantContext convert(String name, Object input, ReferenceContext ref) {
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), refAllele); return vcfToVariantContext(name, ((RodVCF)input).getRecord(), ref);
} }
} }
private static class VCFRecordAdaptor extends VCAdaptor { private static class VCFRecordAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) { VariantContext convert(String name, Object input) {
Allele refAllele = new Allele(((VCFRecord)input).getReference(), true); return vcfToVariantContext(name, (VCFRecord)input, null);
return vcfToVariantContext(name, (VCFRecord)input, refAllele);
} }
VariantContext convert(String name, Object input, Allele refAllele) { VariantContext convert(String name, Object input, ReferenceContext ref) {
return vcfToVariantContext(name, (VCFRecord)input, refAllele); return vcfToVariantContext(name, (VCFRecord)input, ref);
} }
} }
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) {
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, Allele refAllele) {
if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) { if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) {
// add the reference allele // add the reference allele
if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) { if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) {
@ -167,7 +167,9 @@ public class VariantContextAdaptors {
// add all of the alt alleles // add all of the alt alleles
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
Allele refAllele = determineRefAllele(vcf, ref);
alleles.add(refAllele); alleles.add(refAllele);
for ( String alt : vcf.getAlternateAlleleList() ) { for ( String alt : vcf.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) { if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding vcf record %s%n", vcf); //System.out.printf("Excluding vcf record %s%n", vcf);
@ -184,10 +186,14 @@ public class VariantContextAdaptors {
List<Allele> genotypeAlleles = new ArrayList<Allele>(); List<Allele> genotypeAlleles = new ArrayList<Allele>();
for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) { for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) {
Allele a = Allele.getMatchingAllele(alleles, s.getBases()); Allele a = Allele.getMatchingAllele(alleles, s.getBases());
if ( a == null ) if ( a == null ) {
throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf); if ( vcf.getType() == Variation.VARIANT_TYPE.INSERTION || vcf.getType() == Variation.VARIANT_TYPE.DELETION )
genotypeAlleles.add(refAllele);
genotypeAlleles.add(a); else
throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf);
} else {
genotypeAlleles.add(a);
}
} }
Map<String, String> fields = new HashMap<String, String>(); Map<String, String> fields = new HashMap<String, String>();
@ -207,13 +213,45 @@ public class VariantContextAdaptors {
} }
double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError(); double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError();
VariantContext vc = new VariantContext(name, vcf.getLocation(), alleles, genotypes, qual, filters, attributes);
GenomeLoc loc = vcf.getLocation();
if ( vcf.isDeletion() )
loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1);
VariantContext vc = new VariantContext(name, loc, alleles, genotypes, qual, filters, attributes);
vc.validate(); vc.validate();
return vc; return vc;
} else } else
return null; // can't handle anything else return null; // can't handle anything else
} }
private static Allele determineRefAllele(VCFRecord vcf, ReferenceContext ref) {
Allele refAllele;
if ( ref == null ) {
refAllele = new Allele(vcf.getReference(), true);
} else if ( !vcf.isIndel() ) {
refAllele = new Allele(Character.toString(ref.getBase()), true);
} else if ( vcf.isInsertion() ) {
refAllele = new Allele(Allele.NULL_ALLELE_STRING, true);
} else if ( vcf.isDeletion() ) {
int start = (int)(ref.getLocus().getStart() - ref.getWindow().getStart() + 1);
int delLength = 0;
for ( VCFGenotypeEncoding enc : vcf.getAlternateAlleles() ) {
if ( enc.getLength() > delLength )
delLength = enc.getLength();
}
if ( delLength > ref.getWindow().getStop() - ref.getLocus().getStop() )
throw new IllegalArgumentException("Length of deletion is larger than reference context provided at " + ref.getLocus());
char[] deletion = new char[delLength];
System.arraycopy(ref.getBases(), start, deletion, 0, delLength);
refAllele = new Allele(String.valueOf(deletion), true);
} else {
throw new UnsupportedOperationException("Conversion of VCF type " + vcf.getType() + " is not supported.");
}
return refAllele;
}
public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) { public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) {
HashSet<String> names = new LinkedHashSet<String>(); HashSet<String> names = new LinkedHashSet<String>();
@ -364,11 +402,15 @@ public class VariantContextAdaptors {
private static List<String> calcVCFGenotypeKeys(VariantContext vc) { private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
Set<String> keys = new HashSet<String>(); Set<String> keys = new HashSet<String>();
boolean sawGoodQual = false;
for ( Genotype g : vc.getGenotypes().values() ) { for ( Genotype g : vc.getGenotypes().values() ) {
keys.addAll(g.getAttributes().keySet()); keys.addAll(g.getAttributes().keySet());
if ( g.hasNegLog10PError() )
sawGoodQual = true;
} }
keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY); if ( sawGoodQual )
keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
return Utils.sorted(new ArrayList<String>(keys)); return Utils.sorted(new ArrayList<String>(keys));
} }
@ -380,13 +422,17 @@ public class VariantContextAdaptors {
private static class PlinkRodAdaptor extends VCAdaptor { private static class PlinkRodAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) { VariantContext convert(String name, Object input) {
throw new UnsupportedOperationException("Conversion from Plink to VC requires a reference allele"); return convert(name, input, null);
} }
VariantContext convert(String name, Object input, Allele refAllele) { VariantContext convert(String name, Object input, ReferenceContext ref) {
if ( ref == null )
throw new UnsupportedOperationException("Conversion from Plink to VariantContext requires a reference context");
PlinkRod plink = (PlinkRod)input; PlinkRod plink = (PlinkRod)input;
HashSet<Allele> VCAlleles = new HashSet<Allele>(); HashSet<Allele> VCAlleles = new HashSet<Allele>();
Allele refAllele = determineRefAllele(plink, ref);
VCAlleles.add(refAllele); VCAlleles.add(refAllele);
// mapping from Plink Allele to VC Allele (since the PlinkRod does not annotate any of the Alleles as reference) // mapping from Plink Allele to VC Allele (since the PlinkRod does not annotate any of the Alleles as reference)
@ -440,6 +486,23 @@ public class VariantContextAdaptors {
throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file"); throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file");
} }
} }
private Allele determineRefAllele(PlinkRod plink, ReferenceContext ref) {
Allele refAllele;
if ( !plink.isIndel() ) {
refAllele = new Allele(Character.toString(ref.getBase()), true);
} else if ( plink.isInsertion() ) {
refAllele = new Allele(Allele.NULL_ALLELE_STRING, true);
} else {
long maxLength = ref.getWindow().getStop() - ref.getLocus().getStop();
if ( plink.getLength() > maxLength )
throw new UnsupportedOperationException("Plink conversion currently can only handle indels up to length " + maxLength);
char[] deletion = new char[plink.getLength()];
System.arraycopy(ref.getBases(), 1, deletion, 0, plink.getLength());
refAllele = new Allele(String.valueOf(deletion), true);
}
return refAllele;
}
} }
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@ -455,22 +518,23 @@ public class VariantContextAdaptors {
* @return a VariantContext object * @return a VariantContext object
*/ */
VariantContext convert(String name, Object input) { VariantContext convert(String name, Object input) {
if ( ! Allele.acceptableAlleleBases(((RodGLF)input).getReference()) ) return convert(name, input, null);
return null;
Allele refAllele = new Allele(((RodGLF)input).getReference(), true);
return convert(name, input, refAllele);
} }
/** /**
* convert to a Variant Context, given: * convert to a Variant Context, given:
* @param name the name of the ROD * @param name the name of the ROD
* @param input the Rod object, in this case a RodGLF * @param input the Rod object, in this case a RodGLF
* @param refAllele the reference base as an Allele object * @param ref the reference context
* @return a VariantContext object * @return a VariantContext object
*/ */
VariantContext convert(String name, Object input, Allele refAllele) { VariantContext convert(String name, Object input, ReferenceContext ref) {
RodGLF glf = (RodGLF)input; RodGLF glf = (RodGLF)input;
if ( ! Allele.acceptableAlleleBases(glf.getReference()) )
return null;
Allele refAllele = new Allele(glf.getReference(), true);
// make sure we can convert it // make sure we can convert it
if ( glf.isSNP() || glf.isIndel()) { if ( glf.isSNP() || glf.isIndel()) {
// add the reference allele // add the reference allele
@ -528,21 +592,21 @@ public class VariantContextAdaptors {
* @return a VariantContext object * @return a VariantContext object
*/ */
VariantContext convert(String name, Object input) { VariantContext convert(String name, Object input) {
if ( ! Allele.acceptableAlleleBases(((RodGeliText)input).getReference()) ) return convert(name, input, null);
return null;
Allele refAllele = new Allele(((RodGeliText)input).getReference(), true);
return convert(name, input, refAllele);
} }
/** /**
* convert to a Variant Context, given: * convert to a Variant Context, given:
* @param name the name of the ROD * @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText * @param input the Rod object, in this case a RodGeliText
* @param refAllele the reference base as an Allele object * @param ref the reference context
* @return a VariantContext object * @return a VariantContext object
*/ */
VariantContext convert(String name, Object input, Allele refAllele) { VariantContext convert(String name, Object input, ReferenceContext ref) {
RodGeliText geli = (RodGeliText)input; RodGeliText geli = (RodGeliText)input;
if ( ! Allele.acceptableAlleleBases(geli.getReference()) )
return null;
Allele refAllele = new Allele(geli.getReference(), true);
// make sure we can convert it // make sure we can convert it
if ( geli.isSNP() || geli.isIndel()) { if ( geli.isSNP() || geli.isIndel()) {
@ -593,21 +657,25 @@ public class VariantContextAdaptors {
* @return a VariantContext object * @return a VariantContext object
*/ */
VariantContext convert(String name, Object input) { VariantContext convert(String name, Object input) {
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires knowledge of the reference allele"); return convert(name, input, null);
} }
/** /**
* convert to a Variant Context, given: * convert to a Variant Context, given:
* @param name the name of the ROD * @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText * @param input the Rod object, in this case a RodGeliText
* @param refAllele the reference base as an Allele object * @param ref the reference context
* @return a VariantContext object * @return a VariantContext object
*/ */
VariantContext convert(String name, Object input, Allele refAllele) { VariantContext convert(String name, Object input, ReferenceContext ref) {
if ( ref == null )
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
HapMapROD hapmap = (HapMapROD)input; HapMapROD hapmap = (HapMapROD)input;
// add the reference allele // add the reference allele
HashSet<Allele> alleles = new HashSet<Allele>(); HashSet<Allele> alleles = new HashSet<Allele>();
Allele refAllele = new Allele(Character.toString(ref.getBase()), true);
alleles.add(refAllele); alleles.add(refAllele);
// make a mapping from sample to genotype // make a mapping from sample to genotype

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -15,6 +14,7 @@ import java.util.*;
* Converts variants from other file formats to VCF format. * Converts variants from other file formats to VCF format.
*/ */
@Requires(value={},referenceMetaData=@RMD(name=VariantsToVCF.INPUT_ROD_NAME,type= ReferenceOrderedDatum.class)) @Requires(value={},referenceMetaData=@RMD(name=VariantsToVCF.INPUT_ROD_NAME,type= ReferenceOrderedDatum.class))
@Reference(window=@Window(start=0,stop=40))
public class VariantsToVCF extends RodWalker<Integer, Integer> { public class VariantsToVCF extends RodWalker<Integer, Integer> {
public static final String INPUT_ROD_NAME = "variant"; public static final String INPUT_ROD_NAME = "variant";
@ -27,7 +27,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
// Don't allow mixed types for now // Don't allow mixed types for now
private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL); private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL);
private String[] ALLOWED_FORMAT_FIELDS = {"GT"}; private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY };
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
@ -35,8 +35,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)); rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME));
Allele refAllele = new Allele(Character.toString(ref.getBase()), true); Collection<VariantContext> contexts = tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), true, false);
Collection<VariantContext> contexts = tracker.getVariantContexts(INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), refAllele, true, false);
for ( VariantContext vc : contexts ) { for ( VariantContext vc : contexts ) {
VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false); VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false);

View File

@ -73,7 +73,7 @@ public class RealignerTargetCreator extends LocusWalker<RealignerTargetCreator.E
// look at the rods for indels or SNPs // look at the rods for indels or SNPs
if ( tracker != null ) { if ( tracker != null ) {
for ( VariantContext vc : tracker.getAllVariantContexts() ) { for ( VariantContext vc : tracker.getAllVariantContexts(ref) ) {
switch ( vc.getType() ) { switch ( vc.getType() ) {
case INDEL: case INDEL:
hasIndel = true; hasIndel = true;

View File

@ -61,7 +61,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
String refBase = String.valueOf(ref.getBase()); String refBase = String.valueOf(ref.getBase());
Collection<VariantContext> VCs = tracker.getAllVariantContexts(); Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
if ( VCs.size() == 0 ) if ( VCs.size() == 0 )
return ""; return "";
@ -111,7 +111,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
else if ( vc.isInsertion() ) else if ( vc.isInsertion() )
assay_sequence = leading_bases + refBase + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; assay_sequence = leading_bases + refBase + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
else if ( vc.isDeletion() ) else if ( vc.isDeletion() )
assay_sequence = leading_bases + refBase + "[" + vc.getReference().toString() + "/-]" + trailing_bases.substring(vc.getReference().length()); assay_sequence = leading_bases + refBase + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length());
else else
return ""; return "";

View File

@ -33,9 +33,6 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
// "used for smart Hardy-Weinberg annotation",required = false) // "used for smart Hardy-Weinberg annotation",required = false)
//private File popFile = null; //private File popFile = null;
// max allowable indel size (based on ref window)
private static final int MAX_INDEL_SIZE = 40;
// sample names // sample names
private TreeSet<String> sampleNames = null; private TreeSet<String> sampleNames = null;
@ -74,10 +71,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
Object rod = rods.get(0); Object rod = rods.get(0);
// determine the reference allele VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, ref);
Allele refAllele = determineRefAllele(rod, ref);
VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, refAllele);
if ( sampleNames == null ) if ( sampleNames == null )
sampleNames = new TreeSet<String>(vc.getSampleNames()); sampleNames = new TreeSet<String>(vc.getSampleNames());
@ -85,32 +79,6 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
return addVariantInformationToCall(ref, vc, rod); return addVariantInformationToCall(ref, vc, rod);
} }
private Allele determineRefAllele(Object rod, ReferenceContext ref) {
Allele refAllele;
// ugly hack to get around the fact that the Plink rod needs
// a very specific determination of the reference allele
if ( rod instanceof PlinkRod ) {
PlinkRod plink = (PlinkRod)rod;
if ( !plink.isIndel() ) {
refAllele = new Allele(Character.toString(ref.getBase()), true);
} else if ( plink.isInsertion() ) {
refAllele = new Allele(PlinkRod.SEQUENOM_NO_BASE, true);
} else {
if ( plink.getLength() > MAX_INDEL_SIZE )
throw new UnsupportedOperationException("PlinkToVCF currently can only handle indels up to length " + MAX_INDEL_SIZE);
char[] deletion = new char[plink.getLength()];
System.arraycopy(ref.getBases(), 1, deletion, 0, plink.getLength());
refAllele = new Allele(new String(deletion), true);
}
} else {
refAllele = new Allele(Character.toString(ref.getBase()), true);
}
return refAllele;
}
public Integer reduce(VCFRecord call, Integer numVariants) { public Integer reduce(VCFRecord call, Integer numVariants) {
if ( call != null ) { if ( call != null ) {
numVariants++; numVariants++;

View File

@ -45,7 +45,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
EnumSet<VariantContext.Type> allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType); EnumSet<VariantContext.Type> allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType);
int n = 0; int n = 0;
for (VariantContext vc : tracker.getAllVariantContexts(allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) { for (VariantContext vc : tracker.getAllVariantContexts(ref, allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) {
if ( writer != null && n == 0 ) { if ( writer != null && n == 0 ) {
if ( ! wroteHeader ) { if ( ! wroteHeader ) {
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc)); writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));

View File

@ -6,9 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
@ -18,7 +16,6 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Collection; import java.util.Collection;
@ -49,12 +46,12 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
return (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 && return (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 &&
context.getBasePileup().size() != 0 && context.getBasePileup().size() != 0 &&
tracker != null && tracker != null &&
tracker.getAllVariantContexts() != null tracker.getAllVariantContexts(ref) != null
); );
} }
public List<String> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public List<String> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
Collection<VariantContext> contexts = tracker.getAllVariantContexts(); Collection<VariantContext> contexts = tracker.getAllVariantContexts(ref);
for (VariantContext vc : contexts) { for (VariantContext vc : contexts) {
if (vc.isVariant() && !vc.isFiltered()) { if (vc.isVariant() && !vc.isFiltered()) {

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.HashMap; import java.util.HashMap;
import java.util.Collection;
/* /*
* Copyright (c) 2010 The Broad Institute * Copyright (c) 2010 The Broad Institute
@ -100,11 +101,12 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
if( tracker != null ) { if( tracker != null ) {
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
// First find out if this variant is in the truth sets // First find out if this variant is in the truth sets
boolean isInTruthSet = false; boolean isInTruthSet = false;
boolean isTrueVariant = false; boolean isTrueVariant = false;
for ( VariantContext vc : tracker.getAllVariantContexts() ) { for ( VariantContext vc : VCs ) {
if( vc.getName().toUpperCase().startsWith("TRUTH") ) { if( vc.getName().toUpperCase().startsWith("TRUTH") ) {
isInTruthSet = true; isInTruthSet = true;
if (vc.isVariant()) if (vc.isVariant())
@ -113,7 +115,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
} }
// Add each annotation in this VCF Record to the dataManager // Add each annotation in this VCF Record to the dataManager
for ( VariantContext vc : tracker.getAllVariantContexts() ) { for ( VariantContext vc : VCs ) {
if( !vc.getName().toUpperCase().startsWith("TRUTH") ) { if( !vc.getName().toUpperCase().startsWith("TRUTH") ) {
if( vc.isVariant() ) { if( vc.isVariant() ) {
dataManager.addAnnotations( vc, ref.getBase(), SAMPLE_NAME, isInTruthSet, isTrueVariant ); dataManager.addAnnotations( vc, ref.getBase(), SAMPLE_NAME, isInTruthSet, isTrueVariant );

View File

@ -126,7 +126,7 @@ public class VariantConcordanceROCCurveWalker extends RodWalker<ExpandingArrayLi
boolean isInTruthSet = false; boolean isInTruthSet = false;
boolean isTrueVariant = false; boolean isTrueVariant = false;
for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) { for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
if( vc != null && vc.getName().toUpperCase().startsWith("TRUTH") ) { if( vc != null && vc.getName().toUpperCase().startsWith("TRUTH") ) {
if( vc.isSNP() && !vc.isFiltered() ) { if( vc.isSNP() && !vc.isFiltered() ) {
if( multiSampleCalls ) { if( multiSampleCalls ) {

View File

@ -116,7 +116,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
double annotationValues[] = new double[numAnnotations]; double annotationValues[] = new double[numAnnotations];
for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) { for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
if( vc != null && vc.isSNP() ) { if( vc != null && vc.isSNP() ) {
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) { if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {

View File

@ -66,4 +66,19 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
md5); md5);
executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst(); executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst();
} }
@Test
public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>();
md5.add("92d661a3789e55078197666eb9ee7020");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
" -B variant,VCF," + validationDataLocation + "complexExample.vcf" +
" -T VariantsToVCF" +
" -o %s",
1, // just one output file
md5);
executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst();
}
} }

View File

@ -0,0 +1,17 @@
package org.broadinstitute.sting.gatk.walkers.sequenom;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.Arrays;
public class PickSequenomProbesIntegrationTest extends WalkerTest {
@Test
public void testProbes() {
String testVCF = validationDataLocation + "complexExample.vcf";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B input,VCF,"+testVCF+" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("6b5409cc78960f1be855536ed89ea9dd"));
executeTest("Test probes", spec);
}
}