a) Cosmetic modifications to IndelType annotation. b) Add ability to select samples from a file in PrintReads, c) fixes to shaped AF random selection in SelectVariants

This commit is contained in:
Guillermo del Angel 2011-07-07 06:15:10 -04:00
parent 9124c84a7c
commit 5ab2e83904
3 changed files with 69 additions and 12 deletions

View File

@ -32,9 +32,14 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.io.PrintStream;
import java.util.Collection;
import java.util.Set;
import java.util.TreeSet;
/**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
@ -54,6 +59,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
String platform = null; // E.g. ILLUMINA, 454
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
int nReadsToPrint = -1;
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
public Set<File> sampleFiles;
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> sampleNames;
private TreeSet<String> samplesToChoose = new TreeSet<String>();
private boolean NO_SAMPLES_SPECIFIED = false;
/**
* The initialize function.
@ -61,6 +73,17 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
public void initialize() {
if ( platform != null )
platform = platform.toUpperCase();
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
samplesToChoose.addAll(samplesFromFile);
if (sampleNames != null)
samplesToChoose.addAll(sampleNames);
if(samplesToChoose.isEmpty()) {
NO_SAMPLES_SPECIFIED = true;
}
}
/**
@ -87,6 +110,22 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform))
return false;
}
if (!NO_SAMPLES_SPECIFIED ) {
// user specified samples to select
String readSample = read.getReadGroup().getSample();
boolean found = false;
for (String sampleSelected : samplesToChoose) {
if (readSample.equalsIgnoreCase(sampleSelected)) {
found = true;
break;
}
}
if (!found)
return false;
}
// check if we've reached the output limit
if ( nReadsToPrint == 0 ) {

View File

@ -24,11 +24,27 @@ public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
int run;
if ( vc.isIndel() && vc.isBiallelic() ) {
if (vc.isMixed()) {
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%s", "MIXED"));
return map;
}
else if ( vc.isIndel() ) {
String type="";
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
for (int k : inds) {
type = type+ IndelUtils.getIndelClassificationName(k)+".";
if (!vc.isBiallelic())
type = "MULTIALLELIC_INDEL";
else {
if (vc.isInsertion())
type = "INS.";
else if (vc.isDeletion())
type = "DEL.";
else
type = "OTHER.";
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
for (int k : inds) {
type = type+ IndelUtils.getIndelClassificationName(k)+".";
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%s", type));

View File

@ -96,8 +96,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
private boolean KEEP_AF_SPECTRUM = false;
@Hidden
@Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false)
private File AF_FILE = null;
private File AF_FILE = new File("");
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
private String FAMILY_STRUCTURE = "";
@ -240,7 +241,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track");
if (AF_FILE != null) {
if (KEEP_AF_SPECTRUM) {
try {
afBreakpoints = new ArrayList<Double>();
afBoosts = new ArrayList<Double>();
@ -328,8 +329,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
// We then pick a variant with probablity AF*desiredFraction
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
double af;
@ -354,21 +354,23 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
// now boost af by table read from file if desired
//double bkpt = 0.0;
int bkidx = 0;
if (AF_FILE != null) {
if (!afBreakpoints.isEmpty()) {
for ( Double bkpt : afBreakpoints) {
if (af < bkpt + bkDelta)
break;
else bkidx++;
}
afBoost = afBreakpoints.get(bkidx);
System.out.format("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost);
if (bkidx >=afBoosts.size())
bkidx = afBoosts.size()-1;
afBoost = afBoosts.get(bkidx);
//System.out.formatPrin("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost);
}
//System.out.format("%s .. %4.4f\n",afo.toString(), af);
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * af * afBoost)
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost)
vcfWriter.add(sub, ref.getBase());
}