Removed the SequenomValidationConvertor and renamed it VariantValidationAssessor since it no longer handles ped/sequenom files (but instead works on vcfs/variantcontexts). Updated all of the wiki docs, including adding instructions on how to convert ped files to vcf, a la Shaun Purcell. We now officially no longer support ped files everyone. Other misc cleanup in the code.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4419 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a15757b8e8
commit
6448753cf7
|
|
@ -1,7 +1,5 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broad.tribble.hapmap.HapMapFeature;
|
||||
|
|
@ -114,15 +112,6 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static Allele deletionAllele(ReferenceContext ref, int start, int len) {
|
||||
byte[] deletion = new byte[len];
|
||||
System.arraycopy(ref.getBases(), start, deletion, 0, len);
|
||||
return Allele.create(deletion, true);
|
||||
}
|
||||
|
||||
|
||||
public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) {
|
||||
HashSet<String> names = new LinkedHashSet<String>();
|
||||
for ( Genotype g : vc.getGenotypesSortedByName() ) {
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.filters;
|
|||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
|
@ -37,7 +36,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -23,12 +23,11 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
|
@ -36,6 +35,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
@ -46,8 +47,10 @@ import java.util.*;
|
|||
* Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes)
|
||||
*/
|
||||
@Reference(window=@Window(start=0,stop=40))
|
||||
@Requires(value={},referenceMetaData=@RMD(name="sequenom",type= Feature.class))
|
||||
public class SequenomValidationConverter extends RodWalker<Pair<VariantContext, Byte>,Integer> {
|
||||
@Requires(value={},referenceMetaData=@RMD(name=VariantValidationAssessor.INPUT_VARIANT_ROD_BINDING_NAME, type=VariantContext.class))
|
||||
public class VariantValidationAssessor extends RodWalker<Pair<VariantContext, Byte>,Integer> {
|
||||
|
||||
public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
|
||||
|
||||
@Output(doc="File to which variants should be written",required=true)
|
||||
protected VCFWriter vcfwriter = null;
|
||||
|
|
@ -66,7 +69,7 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
|
|||
// sample names
|
||||
private TreeSet<String> sampleNames = null;
|
||||
|
||||
// vcf records
|
||||
// variant context records
|
||||
private ArrayList<Pair<VariantContext, Byte>> records = new ArrayList<Pair<VariantContext, Byte>>();
|
||||
|
||||
// statistics
|
||||
|
|
@ -85,28 +88,26 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
|
|||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
int numberOfVariantsProcessed = 0;
|
||||
return numberOfVariantsProcessed;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Pair<VariantContext, Byte> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
// get the sequenom rod at this locus if there is one
|
||||
List<Object> rods = tracker.getReferenceMetaData("sequenom");
|
||||
List<Object> rods = tracker.getReferenceMetaData(INPUT_VARIANT_ROD_BINDING_NAME);
|
||||
// ignore places where we don't have a variant
|
||||
if ( rods.size() == 0 )
|
||||
return null;
|
||||
|
||||
Object rod = rods.get(0);
|
||||
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, ref);
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext(INPUT_VARIANT_ROD_BINDING_NAME, rod, ref);
|
||||
|
||||
if ( sampleNames == null )
|
||||
sampleNames = new TreeSet<String>(vc.getSampleNames());
|
||||
|
||||
return addVariantInformationToCall(ref, vc, rod);
|
||||
return addVariantInformationToCall(ref, vc);
|
||||
}
|
||||
|
||||
public Integer reduce(Pair<VariantContext, Byte> call, Integer numVariants) {
|
||||
|
|
@ -118,13 +119,14 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
|
|||
}
|
||||
|
||||
public void onTraversalDone(Integer finalReduce) {
|
||||
if ( sampleNames == null )
|
||||
sampleNames = new TreeSet<String>();
|
||||
final ArrayList<String> inputNames = new ArrayList<String>();
|
||||
inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME );
|
||||
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||
|
||||
// set up the info and filter headers
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.add(new VCFHeaderLine("source", "SequenomValidationConverter"));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
hInfo.add(new VCFInfoHeaderLine("NoCallPct", 1, VCFHeaderLineType.Float, "Percent of no-calls"));
|
||||
hInfo.add(new VCFInfoHeaderLine("HomRefPct", 1, VCFHeaderLineType.Float, "Percent of homozygous reference genotypes"));
|
||||
hInfo.add(new VCFInfoHeaderLine("HetPct", 1, VCFHeaderLineType.Float, "Percent of heterozygous genotypes"));
|
||||
|
|
@ -157,15 +159,14 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
|
|||
}
|
||||
}
|
||||
|
||||
VCFHeader header = new VCFHeader(hInfo, sampleNames);
|
||||
vcfwriter.writeHeader(header);
|
||||
vcfwriter.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
|
||||
|
||||
for ( Pair<VariantContext, Byte> record : records )
|
||||
vcfwriter.add(record.first, record.second);
|
||||
}
|
||||
|
||||
|
||||
private Pair<VariantContext, Byte> addVariantInformationToCall(ReferenceContext ref, VariantContext vContext, Object rod) {
|
||||
private Pair<VariantContext, Byte> addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) {
|
||||
|
||||
// check possible filters
|
||||
double hwPvalue = hardyWeinbergCalculation(vContext);
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class SequenomValidationConverterIntegrationTest extends WalkerTest {
|
||||
|
||||
public void testEmpty() {
|
||||
System.err.println("Reinstate these tests when plink is back in");
|
||||
}
|
||||
|
||||
//@Test TODO: reinstate the test when the Plink rod is back
|
||||
public void testSNPs() {
|
||||
String testPedFile = validationDataLocation + "Sequenom_Test_File.txt";
|
||||
String testArgs = "-R "+b36KGReference + " -T SequenomValidationConverter -B:sequenom,Plink "+testPedFile+" -o %s";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("2dab4630f40b76c0762de83fcbb60d09"));
|
||||
executeTest("Test SNPs", spec);
|
||||
}
|
||||
|
||||
// @Test
|
||||
// TODO- need to be reenabled when PED reader tracks gets updated to read indels correctly
|
||||
public void testIndels() {
|
||||
String testPedFile = validationDataLocation + "pilot2_indel_validation.renamed.ped";
|
||||
String testArgs = "-R "+b36KGReference + " -T SequenomValidationConverter -B:sequenom,Plink "+testPedFile+" -o %s";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("fad2dd71550dec064d458c4aa83e4de9"));
|
||||
executeTest("Test Indels", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,6 @@
|
|||
<class name="org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.qc.ValidatingPileupWalker" />
|
||||
<!-- Other misc walkers -->
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.sequenom.SequenomValidationConverter" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.variantutils.CombineVariants" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.VariantsToVCF" />
|
||||
</dependencies>
|
||||
|
|
|
|||
Loading…
Reference in New Issue