Merge branch 'master' of ssh://gsa1.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-07-02 17:15:56 -04:00
commit 6b8af6afd8
2 changed files with 149 additions and 5 deletions

View File

@ -0,0 +1,77 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.IndelUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: 6/29/11
* Time: 3:14 PM
* To change this template use File | Settings | File Templates.
*/
public class TechnologyComposition implements ExperimentalAnnotation,InfoFieldAnnotation {
private String nSLX = "NumSLX";
private String n454 ="Num454";
private String nSolid = "NumSOLiD";
private String nOther = "NumOther";
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
int readsIllumina = 0;
int readsSolid = 0;
int reads454 = 0;
int readsOther = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
ReadBackedPileup pileup = null;
if (context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
if (pileup != null) {
for (PileupElement p : pileup ) {
if(ReadUtils.is454Read(p.getRead()))
reads454++;
else if (ReadUtils.isSOLiDRead(p.getRead()))
readsSolid++;
else if (ReadUtils.isSLXRead(p.getRead()))
readsIllumina++;
else
readsOther++;
}
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(nSLX, String.format("%d", readsIllumina));
map.put(n454, String.format("%d", reads454));
map.put(nSolid, String.format("%d", readsSolid));
map.put(nOther, String.format("%d", readsOther));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(nSLX,n454,nSolid,nOther); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(nSLX, 1, VCFHeaderLineType.Integer, "Number of SLX reads"),
new VCFInfoHeaderLine(n454, 1, VCFHeaderLineType.Integer, "Number of 454 reads"),
new VCFInfoHeaderLine(nSolid, 1, VCFHeaderLineType.Integer, "Number of SOLiD reads"),
new VCFInfoHeaderLine(nOther, 1, VCFHeaderLineType.Integer, "Number of Other technology reads")); }
}

View File

@ -24,14 +24,13 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -42,9 +41,10 @@ import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.apache.log4j.Logger;
import java.io.File;
import java.lang.annotation.AnnotationFormatError;
import java.util.*;
/**
@ -75,12 +75,24 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis.", required=false)
private boolean EXCLUDE_FILTERED = false;
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't include filtered loci.", required=false)
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track. Use -disc ROD_NAME", required=false)
private String discordanceRodName = "";
@Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track. Use -conc ROD_NAME", required=false)
private String concordanceRodName = "";
@Hidden
@Argument(fullName="inputAF", shortName = "inputAF", doc="", required=false)
private String inputAFRodName = "";
@Hidden
@Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
private boolean KEEP_AF_SPECTRUM = false;
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
private String FAMILY_STRUCTURE = "";
@ -178,6 +190,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
// Initialize VCF header
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
if (KEEP_ORIGINAL_CHR_COUNTS) {
headerLines.add(new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
headerLines.add(new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
headerLines.add(new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
}
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) {
@ -269,6 +287,45 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
else if (!SELECT_RANDOM_FRACTION || GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom) {
vcfWriter.add(sub, ref.getBase());
}
else {
if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
Collection<VariantContext> compVCs = tracker.getVariantContexts(ref, inputAFRodName, null, context.getLocation(), true, false);
if (compVCs.isEmpty())
return 0;
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
// We then pick a variant with probablity AF*desiredFraction
for (VariantContext compVC : compVCs) {
if ( compVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
String afo = compVC.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
double af;
if (afo.contains(",")) {
String[] afs = afo.split(",");
afs[0] = afs[0].substring(1,afs[0].length());
afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
double[] afd = new double[afs.length];
for (int k=0; k < afd.length; k++)
afd[k] = Double.valueOf(afs[k]);
af = MathUtils.arrayMax(afd);
//af = Double.valueOf(afs[0]);
}
else
af = Double.valueOf(afo);
//System.out.format("%s .. %4.4f\n",afo.toString(), af);
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * af)
vcfWriter.add(sub, ref.getBase());
}
break; // do only one vc
}
}
}
}
}
@ -413,6 +470,16 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
}
if (KEEP_ORIGINAL_CHR_COUNTS) {
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) )
attributes.put("AC_Orig",attributes.get(VCFConstants.ALLELE_COUNT_KEY));
if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) )
attributes.put("AF_Orig",attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY));
if ( attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY) )
attributes.put("AN_Orig",attributes.get(VCFConstants.ALLELE_NUMBER_KEY));
}
VariantContextUtils.calculateChromosomeCounts(sub,attributes,false);
attributes.put("DP", depth);