Merge branch 'incoming'
This commit is contained in:
commit
4d565b0811
|
|
@ -32,9 +32,14 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
|
||||
|
|
@ -54,6 +59,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
String platform = null; // E.g. ILLUMINA, 454
|
||||
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
||||
int nReadsToPrint = -1;
|
||||
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
|
||||
public Set<File> sampleFiles;
|
||||
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
|
||||
public Set<String> sampleNames;
|
||||
|
||||
private TreeSet<String> samplesToChoose = new TreeSet<String>();
|
||||
private boolean NO_SAMPLES_SPECIFIED = false;
|
||||
|
||||
/**
|
||||
* The initialize function.
|
||||
|
|
@ -61,6 +73,17 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
public void initialize() {
|
||||
if ( platform != null )
|
||||
platform = platform.toUpperCase();
|
||||
|
||||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||
samplesToChoose.addAll(samplesFromFile);
|
||||
|
||||
if (sampleNames != null)
|
||||
samplesToChoose.addAll(sampleNames);
|
||||
|
||||
if(samplesToChoose.isEmpty()) {
|
||||
NO_SAMPLES_SPECIFIED = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -87,6 +110,22 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform))
|
||||
return false;
|
||||
}
|
||||
if (!NO_SAMPLES_SPECIFIED ) {
|
||||
// user specified samples to select
|
||||
String readSample = read.getReadGroup().getSample();
|
||||
boolean found = false;
|
||||
for (String sampleSelected : samplesToChoose) {
|
||||
if (readSample.equalsIgnoreCase(sampleSelected)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!found)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// check if we've reached the output limit
|
||||
if ( nReadsToPrint == 0 ) {
|
||||
|
|
|
|||
|
|
@ -24,11 +24,27 @@ public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation {
|
|||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
int run;
|
||||
if ( vc.isIndel() && vc.isBiallelic() ) {
|
||||
if (vc.isMixed()) {
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%s", "MIXED"));
|
||||
return map;
|
||||
|
||||
}
|
||||
else if ( vc.isIndel() ) {
|
||||
String type="";
|
||||
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
|
||||
for (int k : inds) {
|
||||
type = type+ IndelUtils.getIndelClassificationName(k)+".";
|
||||
if (!vc.isBiallelic())
|
||||
type = "MULTIALLELIC_INDEL";
|
||||
else {
|
||||
if (vc.isInsertion())
|
||||
type = "INS.";
|
||||
else if (vc.isDeletion())
|
||||
type = "DEL.";
|
||||
else
|
||||
type = "OTHER.";
|
||||
ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref);
|
||||
for (int k : inds) {
|
||||
type = type+ IndelUtils.getIndelClassificationName(k)+".";
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%s", type));
|
||||
|
|
|
|||
|
|
@ -96,8 +96,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
|
||||
private boolean KEEP_AF_SPECTRUM = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false)
|
||||
private File AF_FILE = null;
|
||||
private File AF_FILE = new File("");
|
||||
|
||||
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||
private String FAMILY_STRUCTURE = "";
|
||||
|
|
@ -240,7 +241,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track");
|
||||
|
||||
|
||||
if (AF_FILE != null) {
|
||||
if (KEEP_AF_SPECTRUM) {
|
||||
try {
|
||||
afBreakpoints = new ArrayList<Double>();
|
||||
afBoosts = new ArrayList<Double>();
|
||||
|
|
@ -328,8 +329,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) {
|
||||
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
|
||||
// We then pick a variant with probablity AF*desiredFraction
|
||||
|
||||
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
||||
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
||||
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||
|
||||
double af;
|
||||
|
|
@ -354,21 +354,23 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
// now boost af by table read from file if desired
|
||||
//double bkpt = 0.0;
|
||||
int bkidx = 0;
|
||||
if (AF_FILE != null) {
|
||||
if (!afBreakpoints.isEmpty()) {
|
||||
for ( Double bkpt : afBreakpoints) {
|
||||
if (af < bkpt + bkDelta)
|
||||
break;
|
||||
else bkidx++;
|
||||
}
|
||||
afBoost = afBreakpoints.get(bkidx);
|
||||
System.out.format("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost);
|
||||
if (bkidx >=afBoosts.size())
|
||||
bkidx = afBoosts.size()-1;
|
||||
afBoost = afBoosts.get(bkidx);
|
||||
//System.out.formatPrin("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
//System.out.format("%s .. %4.4f\n",afo.toString(), af);
|
||||
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * af * afBoost)
|
||||
if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost)
|
||||
vcfWriter.add(sub, ref.getBase());
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue