Removed the SequenomValidationConvertor and renamed it VariantValidationAssessor since it no longer handles ped/sequenom files (but instead works on vcfs/variantcontexts). Updated all of the wiki docs, including adding instructions on how to convert ped files to vcf, a la Shaun Purcell. We now officially no longer support ped files everyone. Other misc cleanup in the code.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4419 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-10-04 02:11:38 +00:00
parent a15757b8e8
commit 6448753cf7
5 changed files with 21 additions and 66 deletions

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.gelitext.GeliTextFeature; import org.broad.tribble.gelitext.GeliTextFeature;
import org.broad.tribble.hapmap.HapMapFeature; import org.broad.tribble.hapmap.HapMapFeature;
@ -114,15 +112,6 @@ public class VariantContextAdaptors {
} }
} }
private static Allele deletionAllele(ReferenceContext ref, int start, int len) {
byte[] deletion = new byte[len];
System.arraycopy(ref.getBases(), start, deletion, 0, len);
return Allele.create(deletion, true);
}
public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) { public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) {
HashSet<String> names = new LinkedHashSet<String>(); HashSet<String> names = new LinkedHashSet<String>();
for ( Genotype g : vc.getGenotypesSortedByName() ) { for ( Genotype g : vc.getGenotypesSortedByName() ) {

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.filters;
import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*; import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
@ -37,7 +36,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.vcf.VCFUtils;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2010 The Broad Institute * Copyright (c) 2010.
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
@ -23,12 +23,11 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE. * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.gatk.walkers.sequenom; package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*; import org.broad.tribble.vcf.*;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
@ -36,6 +35,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
@ -46,8 +47,10 @@ import java.util.*;
* Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes) * Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes)
*/ */
@Reference(window=@Window(start=0,stop=40)) @Reference(window=@Window(start=0,stop=40))
@Requires(value={},referenceMetaData=@RMD(name="sequenom",type= Feature.class)) @Requires(value={},referenceMetaData=@RMD(name=VariantValidationAssessor.INPUT_VARIANT_ROD_BINDING_NAME, type=VariantContext.class))
public class SequenomValidationConverter extends RodWalker<Pair<VariantContext, Byte>,Integer> { public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, Byte>,Integer> {
public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
@Output(doc="File to which variants should be written",required=true) @Output(doc="File to which variants should be written",required=true)
protected VCFWriter vcfwriter = null; protected VCFWriter vcfwriter = null;
@ -66,7 +69,7 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
// sample names // sample names
private TreeSet<String> sampleNames = null; private TreeSet<String> sampleNames = null;
// vcf records // variant context records
private ArrayList<Pair<VariantContext, Byte>> records = new ArrayList<Pair<VariantContext, Byte>>(); private ArrayList<Pair<VariantContext, Byte>> records = new ArrayList<Pair<VariantContext, Byte>>();
// statistics // statistics
@ -85,28 +88,26 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
} }
public Integer reduceInit() { public Integer reduceInit() {
int numberOfVariantsProcessed = 0; return 0;
return numberOfVariantsProcessed;
} }
public Pair<VariantContext, Byte> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Pair<VariantContext, Byte> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) if ( tracker == null )
return null; return null;
// get the sequenom rod at this locus if there is one List<Object> rods = tracker.getReferenceMetaData(INPUT_VARIANT_ROD_BINDING_NAME);
List<Object> rods = tracker.getReferenceMetaData("sequenom");
// ignore places where we don't have a variant // ignore places where we don't have a variant
if ( rods.size() == 0 ) if ( rods.size() == 0 )
return null; return null;
Object rod = rods.get(0); Object rod = rods.get(0);
VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, ref); VariantContext vc = VariantContextAdaptors.toVariantContext(INPUT_VARIANT_ROD_BINDING_NAME, rod, ref);
if ( sampleNames == null ) if ( sampleNames == null )
sampleNames = new TreeSet<String>(vc.getSampleNames()); sampleNames = new TreeSet<String>(vc.getSampleNames());
return addVariantInformationToCall(ref, vc, rod); return addVariantInformationToCall(ref, vc);
} }
public Integer reduce(Pair<VariantContext, Byte> call, Integer numVariants) { public Integer reduce(Pair<VariantContext, Byte> call, Integer numVariants) {
@ -118,13 +119,14 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
} }
public void onTraversalDone(Integer finalReduce) { public void onTraversalDone(Integer finalReduce) {
if ( sampleNames == null ) final ArrayList<String> inputNames = new ArrayList<String>();
sampleNames = new TreeSet<String>(); inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME );
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
// set up the info and filter headers // set up the info and filter headers
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.add(new VCFHeaderLine("source", "SequenomValidationConverter"));
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
hInfo.add(new VCFInfoHeaderLine("NoCallPct", 1, VCFHeaderLineType.Float, "Percent of no-calls")); hInfo.add(new VCFInfoHeaderLine("NoCallPct", 1, VCFHeaderLineType.Float, "Percent of no-calls"));
hInfo.add(new VCFInfoHeaderLine("HomRefPct", 1, VCFHeaderLineType.Float, "Percent of homozygous reference genotypes")); hInfo.add(new VCFInfoHeaderLine("HomRefPct", 1, VCFHeaderLineType.Float, "Percent of homozygous reference genotypes"));
hInfo.add(new VCFInfoHeaderLine("HetPct", 1, VCFHeaderLineType.Float, "Percent of heterozygous genotypes")); hInfo.add(new VCFInfoHeaderLine("HetPct", 1, VCFHeaderLineType.Float, "Percent of heterozygous genotypes"));
@ -157,15 +159,14 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
} }
} }
VCFHeader header = new VCFHeader(hInfo, sampleNames); vcfwriter.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
vcfwriter.writeHeader(header);
for ( Pair<VariantContext, Byte> record : records ) for ( Pair<VariantContext, Byte> record : records )
vcfwriter.add(record.first, record.second); vcfwriter.add(record.first, record.second);
} }
private Pair<VariantContext, Byte> addVariantInformationToCall(ReferenceContext ref, VariantContext vContext, Object rod) { private Pair<VariantContext, Byte> addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
// check possible filters // check possible filters
double hwPvalue = hardyWeinbergCalculation(vContext); double hwPvalue = hardyWeinbergCalculation(vContext);

View File

@ -1,32 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.sequenom;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.Arrays;
public class SequenomValidationConverterIntegrationTest extends WalkerTest {
public void testEmpty() {
System.err.println("Reinstate these tests when plink is back in");
}
//@Test TODO: reinstate the test when the Plink rod is back
public void testSNPs() {
String testPedFile = validationDataLocation + "Sequenom_Test_File.txt";
String testArgs = "-R "+b36KGReference + " -T SequenomValidationConverter -B:sequenom,Plink "+testPedFile+" -o %s";
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("2dab4630f40b76c0762de83fcbb60d09"));
executeTest("Test SNPs", spec);
}
// @Test
// TODO- need to be reenabled when PED reader tracks gets updated to read indels correctly
public void testIndels() {
String testPedFile = validationDataLocation + "pilot2_indel_validation.renamed.ped";
String testArgs = "-R "+b36KGReference + " -T SequenomValidationConverter -B:sequenom,Plink "+testPedFile+" -o %s";
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("fad2dd71550dec064d458c4aa83e4de9"));
executeTest("Test Indels", spec);
}
}

View File

@ -29,7 +29,6 @@
<class name="org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker" /> <class name="org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker" />
<class name="org.broadinstitute.sting.gatk.walkers.qc.ValidatingPileupWalker" /> <class name="org.broadinstitute.sting.gatk.walkers.qc.ValidatingPileupWalker" />
<!-- Other misc walkers --> <!-- Other misc walkers -->
<class name="org.broadinstitute.sting.gatk.walkers.sequenom.SequenomValidationConverter" />
<class name="org.broadinstitute.sting.gatk.walkers.variantutils.CombineVariants" /> <class name="org.broadinstitute.sting.gatk.walkers.variantutils.CombineVariants" />
<class name="org.broadinstitute.sting.gatk.walkers.VariantsToVCF" /> <class name="org.broadinstitute.sting.gatk.walkers.VariantsToVCF" />
</dependencies> </dependencies>