Merge branch 'incoming'

This commit is contained in:
Guillermo del Angel 2011-07-07 06:21:05 -04:00
commit 4d565b0811
3 changed files with 69 additions and 12 deletions

View File

@ -32,9 +32,14 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Collection;
import java.util.Set;
import java.util.TreeSet;
/** /**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
@ -54,6 +59,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
String platform = null; // E.g. ILLUMINA, 454 String platform = null; // E.g. ILLUMINA, 454
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
int nReadsToPrint = -1; int nReadsToPrint = -1;
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
public Set<File> sampleFiles;
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> sampleNames;
private TreeSet<String> samplesToChoose = new TreeSet<String>();
private boolean NO_SAMPLES_SPECIFIED = false;
/** /**
* The initialize function. * The initialize function.
@ -61,6 +73,17 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
public void initialize() { public void initialize() {
if ( platform != null ) if ( platform != null )
platform = platform.toUpperCase(); platform = platform.toUpperCase();
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
samplesToChoose.addAll(samplesFromFile);
if (sampleNames != null)
samplesToChoose.addAll(sampleNames);
if(samplesToChoose.isEmpty()) {
NO_SAMPLES_SPECIFIED = true;
}
} }
/** /**
@ -87,6 +110,22 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform)) if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform))
return false; return false;
} }
if (!NO_SAMPLES_SPECIFIED ) {
// user specified samples to select
String readSample = read.getReadGroup().getSample();
boolean found = false;
for (String sampleSelected : samplesToChoose) {
if (readSample.equalsIgnoreCase(sampleSelected)) {
found = true;
break;
}
}
if (!found)
return false;
}
// check if we've reached the output limit // check if we've reached the output limit
if ( nReadsToPrint == 0 ) { if ( nReadsToPrint == 0 ) {

View File

@ -24,12 +24,28 @@ public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
int run; int run;
if ( vc.isIndel() && vc.isBiallelic() ) { if (vc.isMixed()) {
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%s", "MIXED"));
return map;
}
else if ( vc.isIndel() ) {
String type=""; String type="";
if (!vc.isBiallelic())
type = "MULTIALLELIC_INDEL";
else {
if (vc.isInsertion())
type = "INS.";
else if (vc.isDeletion())
type = "DEL.";
else
type = "OTHER.";
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref); ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
for (int k : inds) { for (int k : inds) {
type = type+ IndelUtils.getIndelClassificationName(k)+"."; type = type+ IndelUtils.getIndelClassificationName(k)+".";
} }
}
Map<String, Object> map = new HashMap<String, Object>(); Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%s", type)); map.put(getKeyNames().get(0), String.format("%s", type));
return map; return map;

View File

@ -96,8 +96,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
private boolean KEEP_AF_SPECTRUM = false; private boolean KEEP_AF_SPECTRUM = false;
@Hidden
@Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false) @Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false)
private File AF_FILE = null; private File AF_FILE = new File("");
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) @Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
private String FAMILY_STRUCTURE = ""; private String FAMILY_STRUCTURE = "";
@ -240,7 +241,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track"); if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track");
if (AF_FILE != null) { if (KEEP_AF_SPECTRUM) {
try { try {
afBreakpoints = new ArrayList<Double>(); afBreakpoints = new ArrayList<Double>();
afBoosts = new ArrayList<Double>(); afBoosts = new ArrayList<Double>();
@ -328,7 +329,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) { if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName. // ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
// We then pick a variant with probablity AF*desiredFraction // We then pick a variant with probablity AF*desiredFraction
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) { if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY); String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
@ -354,21 +354,23 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
// now boost af by table read from file if desired // now boost af by table read from file if desired
//double bkpt = 0.0; //double bkpt = 0.0;
int bkidx = 0; int bkidx = 0;
if (AF_FILE != null) { if (!afBreakpoints.isEmpty()) {
for ( Double bkpt : afBreakpoints) { for ( Double bkpt : afBreakpoints) {
if (af < bkpt + bkDelta) if (af < bkpt + bkDelta)
break; break;
else bkidx++; else bkidx++;
} }
afBoost = afBreakpoints.get(bkidx); if (bkidx >=afBoosts.size())
System.out.format("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost); bkidx = afBoosts.size()-1;
afBoost = afBoosts.get(bkidx);
//System.out.formatPrin("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost);
} }
//System.out.format("%s .. %4.4f\n",afo.toString(), af); //System.out.format("%s .. %4.4f\n",afo.toString(), af);
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * af * afBoost) if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost)
vcfWriter.add(sub, ref.getBase()); vcfWriter.add(sub, ref.getBase());
} }