diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index f4c565318..59d496828 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -100,7 +100,11 @@ public class GATKReport { * @param tableDescription the description of the table */ public void addTable(String tableName, String tableDescription) { - GATKReportTable table = new GATKReportTable(tableName, tableDescription); + addTable(tableName, tableDescription, true); + } + + public void addTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { + GATKReportTable table = new GATKReportTable(tableName, tableDescription, sortByPrimaryKey); tables.put(tableName, table); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 0e503f92a..f7ea25696 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.report; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.PrintStream; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.TreeSet; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -96,8 +94,9 @@ public class GATKReportTable { private String tableDescription; private String primaryKeyName; - private TreeSet primaryKeyColumn; + private Collection primaryKeyColumn; private boolean primaryKeyDisplay; + boolean sortByPrimaryKey = true; private LinkedHashMap columns; @@ -121,12 +120,17 @@ public class GATKReportTable { * @param tableDescription the description of the table */ public GATKReportTable(String tableName, String tableDescription) { - if (!isValidName(tableName)) { + this(tableName, tableDescription, true); + } + + public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) { + if (!isValidName(tableName)) { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } this.tableName = tableName; this.tableDescription = tableDescription; + this.sortByPrimaryKey = sortByPrimaryKey; columns = new LinkedHashMap(); } @@ -137,20 +141,14 @@ public class GATKReportTable { * @param primaryKeyName the name of the primary key column */ public void addPrimaryKey(String primaryKeyName) { - if (!isValidName(primaryKeyName)) { - throw new ReviewedStingException("Attempted to set a GATKReportTable primary key name of '" + primaryKeyName + "'. GATKReportTable primary key names must be purely alphanumeric - no spaces or special characters are allowed."); - } - - this.primaryKeyName = primaryKeyName; - - primaryKeyColumn = new TreeSet(); - primaryKeyDisplay = true; + addPrimaryKey(primaryKeyName, true); } /** * Add an optionally visible primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column. * * @param primaryKeyName the name of the primary key column + * @param display should this primary key be displayed? */ public void addPrimaryKey(String primaryKeyName, boolean display) { if (!isValidName(primaryKeyName)) { @@ -159,7 +157,7 @@ public class GATKReportTable { this.primaryKeyName = primaryKeyName; - primaryKeyColumn = new TreeSet(); + primaryKeyColumn = sortByPrimaryKey ? new TreeSet() : new LinkedList(); primaryKeyDisplay = display; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index a189c00b5..57ea5166a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -30,10 +30,16 @@ import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import java.io.File; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; /** * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear * in the input file. It can dynamically merge the contents of multiple input BAM files, resulting @@ -52,6 +58,13 @@ public class PrintReadsWalker extends ReadWalker { String platform = null; // E.g. ILLUMINA, 454 @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; + @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false) + public Set sampleFiles; + @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) + public Set sampleNames; + + private TreeSet samplesToChoose = new TreeSet(); + private boolean NO_SAMPLES_SPECIFIED = false; /** * The initialize function. @@ -59,6 +72,17 @@ public class PrintReadsWalker extends ReadWalker { public void initialize() { if ( platform != null ) platform = platform.toUpperCase(); + + Collection samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); + samplesToChoose.addAll(samplesFromFile); + + if (sampleNames != null) + samplesToChoose.addAll(sampleNames); + + if(samplesToChoose.isEmpty()) { + NO_SAMPLES_SPECIFIED = true; + } + } /** @@ -85,6 +109,22 @@ public class PrintReadsWalker extends ReadWalker { if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform)) return false; } + if (!NO_SAMPLES_SPECIFIED ) { + // user specified samples to select + String readSample = read.getReadGroup().getSample(); + boolean found = false; + for (String sampleSelected : samplesToChoose) { + if (readSample.equalsIgnoreCase(sampleSelected)) { + found = true; + break; + } + + } + + if (!found) + return false; + } + // check if we've reached the output limit if ( nReadsToPrint == 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index 12b48473d..2fd62ddf3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -24,11 +24,27 @@ public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { int run; - if ( vc.isIndel() && vc.isBiallelic() ) { + if (vc.isMixed()) { + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%s", "MIXED")); + return map; + + } + else if ( vc.isIndel() ) { String type=""; - ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); - for (int k : inds) { - type = type+ IndelUtils.getIndelClassificationName(k)+"."; + if (!vc.isBiallelic()) + type = "MULTIALLELIC_INDEL"; + else { + if (vc.isInsertion()) + type = "INS."; + else if (vc.isDeletion()) + type = "DEL."; + else + type = "OTHER."; + ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); + for (int k : inds) { + type = type+ IndelUtils.getIndelClassificationName(k)+"."; + } } Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%s", type)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java index 15b16ca6b..a1c043365 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java @@ -29,9 +29,7 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecordIterator; import net.sf.samtools.util.BlockCompressedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; +import java.io.*; import java.util.Arrays; @@ -102,8 +100,10 @@ public class BAMDiffableReader implements DiffableReader { final byte[] BAM_MAGIC = "BAM\1".getBytes(); final byte[] buffer = new byte[BAM_MAGIC.length]; try { - FileInputStream fstream = new FileInputStream(file); - new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length); + InputStream fstream = new BufferedInputStream(new FileInputStream(file)); + if ( !BlockCompressedInputStream.isValidFile(fstream) ) + return false; + new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length); return Arrays.equals(buffer, BAM_MAGIC); } catch ( IOException e ) { return false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index e3910ef11..89e20dad1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -143,7 +143,7 @@ public class DiffEngine { * Not that only pairs of the same length are considered as potentially equivalent * * @param params determines how we display the items - * @param diffs + * @param diffs the list of differences to summarize */ public void reportSummarizedDifferences(List diffs, SummaryReportParams params ) { printSummaryReport(summarizeDifferences(diffs), params ); @@ -207,14 +207,7 @@ public class DiffEngine { } protected void printSummaryReport(List sortedSummaries, SummaryReportParams params ) { - GATKReport report = new GATKReport(); - final String tableName = "diffences"; - report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information"); - GATKReportTable table = report.getTable(tableName); - table.addPrimaryKey("Difference", true); - table.addColumn("NumberOfOccurrences", 0); - table.addColumn("SpecificDifference", 0); - + List toShow = new ArrayList(); int count = 0, count1 = 0; for ( Difference diff : sortedSummaries ) { if ( diff.getCount() < params.minSumDiffToShow ) @@ -230,10 +223,26 @@ public class DiffEngine { break; } - table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); - table.set(diff.getPath(), "SpecificDifference", diff.valueDiffString()); + toShow.add(diff); } + // if we want it in descending order, reverse the list + if ( ! params.descending ) { + Collections.reverse(toShow); + } + + // now that we have a specific list of values we want to show, display them + GATKReport report = new GATKReport(); + final String tableName = "diffences"; + report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false); + GATKReportTable table = report.getTable(tableName); + table.addPrimaryKey("Difference", true); + table.addColumn("NumberOfOccurrences", 0); + table.addColumn("ExampleDifference", 0); + for ( Difference diff : toShow ) { + table.set(diff.getPath(), "NumberOfOccurrences", diff.getCount()); + table.set(diff.getPath(), "ExampleDifference", diff.valueDiffString()); + } table.write(params.out); } @@ -252,7 +261,7 @@ public class DiffEngine { * commonPostfixLength: how many parts are shared at the end, suppose its 2 * We want to create a string *.*.C.D * - * @param parts + * @param parts the separated path values [above without .] * @param commonPostfixLength * @return */ @@ -351,6 +360,7 @@ public class DiffEngine { int maxItemsToDisplay = 0; int maxCountOneItems = 0; int minSumDiffToShow = 0; + boolean descending = true; public SummaryReportParams(PrintStream out, int maxItemsToDisplay, int maxCountOneItems, int minSumDiffToShow) { this.out = out; @@ -358,5 +368,9 @@ public class DiffEngine { this.maxCountOneItems = maxCountOneItems; this.minSumDiffToShow = minSumDiffToShow; } + + public void setDescending(boolean descending) { + this.descending = descending; + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index 8e362dcc4..fba6549fb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -112,6 +112,7 @@ public class DiffObjectsWalker extends RodWalker { } DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff); + params.setDescending(false); diffEngine.reportSummarizedDifferences(diffs, params); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index df2a5cda1..77a992ce0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -72,13 +72,19 @@ public class VCFDiffableReader implements DiffableReader { } String line = lineReader.readLine(); - int count = 0; + int count = 0, nRecordsAtPos = 1; + String prevName = ""; while ( line != null ) { if ( count++ > maxElementsToRead && maxElementsToRead != -1) break; VariantContext vc = (VariantContext)vcfCodec.decode(line); String name = vc.getChr() + ":" + vc.getStart(); + if ( name.equals(prevName) ) { + name += "_" + ++nRecordsAtPos; + } else { + prevName = name; + } DiffNode vcRoot = DiffNode.empty(name, root); // add fields diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 837f352f8..9c2a520ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.apache.poi.hpsf.Variant; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; @@ -149,7 +150,7 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getAllVariantContexts(ref, null,context.getLocation(), true, false); + Collection vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); @@ -172,17 +173,25 @@ public class CombineVariants extends RodWalker { if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) return 0; - VariantContext mergedVC; + List mergedVCs = new ArrayList(); if ( master ) { - mergedVC = VariantContextUtils.masterMerge(vcs, "master"); + mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master")); } else { - mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); + Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + // iterate over the types so that it's deterministic + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); + } } - //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); + for ( VariantContext mergedVC : mergedVCs ) { + // only operate at the start of events + if ( mergedVC == null ) + continue; - if ( mergedVC != null ) { // only operate at the start of events HashMap attributes = new HashMap(mergedVC.getAttributes()); // re-compute chromosome counts VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 1db692e9f..ac6797609 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -24,6 +24,16 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; @@ -44,6 +54,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.lang.annotation.AnnotationFormatError; import java.util.*; /** @@ -91,6 +104,13 @@ public class SelectVariants extends RodWalker { @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) private boolean KEEP_AF_SPECTRUM = false; + @Hidden + @Argument(fullName="afFile", shortName="afFile", doc="The output recal file used by ApplyRecalibration", required=false) + private File AF_FILE = new File(""); + + @Hidden + @Argument(fullName="family_structure_file", shortName="familyFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private File FAMILY_STRUCTURE_FILE = null; @Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) private String FAMILY_STRUCTURE = ""; @@ -113,6 +133,9 @@ public class SelectVariants extends RodWalker { @Argument(fullName="selectIndels", shortName="indels", doc="Select only Indels.", required=false) private boolean SELECT_INDELS = false; + @Hidden + @Argument(fullName="outMVFile", shortName="outMVFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private String outMVFile = null; /* Private class used to store the intermediate variants in the integer random selection process */ private class RandomVariantStructure { @@ -140,7 +163,7 @@ public class SelectVariants extends RodWalker { private boolean DISCORDANCE_ONLY = false; private boolean CONCORDANCE_ONLY = false; - private MendelianViolation mv; + private Set mvSet = new HashSet(); /* default name for the variant dataset (VCF) */ private final String variantRodName = "variant"; @@ -155,8 +178,14 @@ public class SelectVariants extends RodWalker { private RandomVariantStructure [] variantArray; + /* Variables used for random selection with AF boosting */ + private ArrayList afBreakpoints = null; + private ArrayList afBoosts = null; + double bkDelta = 0.0; + private PrintStream outMVFileStream = null; + /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher @@ -212,10 +241,29 @@ public class SelectVariants extends RodWalker { CONCORDANCE_ONLY = concordanceRodName.length() > 0; if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName); - if (MENDELIAN_VIOLATIONS) - mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD); + if (MENDELIAN_VIOLATIONS) { + if ( FAMILY_STRUCTURE_FILE != null) { + try { + for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) { + MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom())) + mvSet.add(mv); + } + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(AF_FILE, e); + } + if (outMVFile != null) + try { + outMVFileStream = new PrintStream(outMVFile); + } + catch (FileNotFoundException e) { + throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); } + } + else + mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD)); + } else if (!FAMILY_STRUCTURE.isEmpty()) { - mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); MENDELIAN_VIOLATIONS = true; } @@ -227,6 +275,33 @@ public class SelectVariants extends RodWalker { SELECT_RANDOM_FRACTION = fractionRandom > 0; if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + fractionRandom + "% of the variants at random from the variant track"); + + + if (KEEP_AF_SPECTRUM) { + try { + afBreakpoints = new ArrayList(); + afBoosts = new ArrayList(); + logger.info("Reading in AF boost table..."); + boolean firstLine = false; + for ( final String line : new XReadLines( AF_FILE ) ) { + if (!firstLine) { + firstLine = true; + continue; + } + final String[] vals = line.split(" "); + + double bkp = Double.valueOf(vals[0]); + double afb = Double.valueOf(vals[1]); + afBreakpoints.add(bkp); + afBoosts.add(afb); + + } + bkDelta = afBreakpoints.get(0); + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(AF_FILE, e); + } + + } } /** @@ -250,9 +325,24 @@ public class SelectVariants extends RodWalker { for (VariantContext vc : vcs) { if (MENDELIAN_VIOLATIONS) { - if (!mv.isViolation(vc)) { - break; + boolean foundMV = false; + for (MendelianViolation mv : mvSet) { + if (mv.isViolation(vc)) { + foundMV = true; + //System.out.println(vc.toString()); + if (outMVFile != null) + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + + "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), + vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), + mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), + vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), + vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), + vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() ); + } } + + if (!foundMV) + break; } if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getVariantContexts(ref, discordanceRodName, null, context.getLocation(), true, false); @@ -283,46 +373,59 @@ public class SelectVariants extends RodWalker { if (SELECT_RANDOM_NUMBER) { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } - else if (!SELECT_RANDOM_FRACTION || GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom) { + else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub, ref.getBase()); } else { if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) { - Collection compVCs = tracker.getVariantContexts(ref, inputAFRodName, null, context.getLocation(), true, false); - if (compVCs.isEmpty()) - return 0; - // ok we have a comp VC and we need to match the AF spectrum of inputAFRodName. // We then pick a variant with probablity AF*desiredFraction - for (VariantContext compVC : compVCs) { - if ( compVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) { - String afo = compVC.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY); + if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) { + String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY); - double af; - if (afo.contains(",")) { - String[] afs = afo.split(","); - afs[0] = afs[0].substring(1,afs[0].length()); - afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + double af; + double afBoost = 1.0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); - double[] afd = new double[afs.length]; + double[] afd = new double[afs.length]; - for (int k=0; k < afd.length; k++) - afd[k] = Double.valueOf(afs[k]); + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); - af = MathUtils.arrayMax(afd); - //af = Double.valueOf(afs[0]); + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); - } - else - af = Double.valueOf(afo); - - //System.out.format("%s .. %4.4f\n",afo.toString(), af); - if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * af) - vcfWriter.add(sub, ref.getBase()); } - break; // do only one vc + else + af = Double.valueOf(afo); + + // now boost af by table read from file if desired + //double bkpt = 0.0; + int bkidx = 0; + if (!afBreakpoints.isEmpty()) { + for ( Double bkpt : afBreakpoints) { + if (af < bkpt + bkDelta) + break; + else bkidx++; + } + if (bkidx >=afBoosts.size()) + bkidx = afBoosts.size()-1; + afBoost = afBoosts.get(bkidx); + //System.out.formatPrin("af:%f bkidx:%d afboost:%f\n",af,bkidx,afBoost); + + + + } + + //System.out.format("%s .. %4.4f\n",afo.toString(), af); + if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost) + vcfWriter.add(sub, ref.getBase()); } + } } } @@ -406,8 +509,8 @@ public class SelectVariants extends RodWalker { private boolean haveSameGenotypes(Genotype g1, Genotype g2) { if ((g1.isCalled() && g2.isFiltered()) || - (g2.isCalled() && g1.isFiltered()) || - (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED)) + (g2.isCalled() && g1.isFiltered()) || + (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED)) return false; List a1s = g1.getAlleles(); @@ -440,7 +543,7 @@ public class SelectVariants extends RodWalker { * @param vc the VariantContext record to subset * @param samples the samples to extract * @return the subsetted VariantContext - */ + */ private VariantContext subsetRecord(VariantContext vc, Set samples) { if ( samples == null || samples.isEmpty() ) return vc; @@ -450,7 +553,7 @@ public class SelectVariants extends RodWalker { if ( samples.contains(genotypePair.getKey()) ) genotypes.add(genotypePair.getValue()); } - + VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles()); HashMap attributes = new HashMap(sub.getAttributes()); @@ -460,7 +563,7 @@ public class SelectVariants extends RodWalker { Genotype g = sub.getGenotype(sample); if (g.isNotFiltered() && g.isCalled()) { - + String dp = (String) g.getAttribute("DP"); if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) { depth += Integer.valueOf(dp); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 8d90af65a..39358dad5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -24,6 +24,8 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -33,7 +35,6 @@ import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.PrintStream; @@ -74,17 +75,29 @@ public class VariantsToTable extends RodWalker { // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } }); getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } }); - getters.put("REF", new Getter() { public String get(VariantContext vc) { return vc.getReference().toString(); } }); + getters.put("REF", new Getter() { + public String get(VariantContext vc) { + String x = ""; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x=x+new String(new byte[]{refByte}); + } + return x+vc.getReference().getDisplayString(); + } + }); getters.put("ALT", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); int n = vc.getAlternateAlleles().size(); - if ( n == 0 ) return "."; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x.append(new String(new byte[]{refByte})); + } for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); - x.append(vc.getAlternateAllele(i).toString()); + x.append(vc.getAlternateAllele(i).getDisplayString()); } return x.toString(); } @@ -168,6 +181,31 @@ public class VariantsToTable extends RodWalker { throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc)); } + if (field.equals("AF") || field.equals("AC")) { + String afo = val; + + double af=0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + + double[] afd = new double[afs.length]; + + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); + + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); + + } + else + if (!afo.equals("NA")) + af = Double.valueOf(afo); + + val = Double.toString(af); + + } vals.add(val); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 5a5671056..212600360 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -289,8 +289,8 @@ public class VariantContextUtils { /** * Returns a newly allocated VC that is the same as VC, but without genotypes - * @param vc - * @return + * @param vc variant context + * @return new VC without genotypes */ @Requires("vc != null") @Ensures("result != null") @@ -303,8 +303,8 @@ public class VariantContextUtils { /** * Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes - * @param vcs - * @return + * @param vcs collection of VCs + * @return new VCs without genotypes */ @Requires("vcs != null") @Ensures("result != null") @@ -362,9 +362,9 @@ public class VariantContextUtils { * information per genotype. The master merge will add the PQ information from each genotype record, where * appropriate, to the master VC. * - * @param unsortedVCs - * @param masterName - * @return + * @param unsortedVCs collection of VCs + * @param masterName name of master VC + * @return master-merged VC */ public static VariantContext masterMerge(Collection unsortedVCs, String masterName) { VariantContext master = findMaster(unsortedVCs, masterName); @@ -435,11 +435,15 @@ public class VariantContextUtils { * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with * the sample name * - * @param unsortedVCs - * @param priorityListOfVCs - * @param filteredRecordMergeType - * @param genotypeMergeOptions - * @return + * @param genomeLocParser loc parser + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param inputRefBase the ref base + * @return new VariantContext */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, @@ -448,6 +452,24 @@ public class VariantContextUtils { return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false); } + /** + * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. + * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with + * the sample name + * + * @param genomeLocParser loc parser + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param inputRefBase the ref base + * @param setKey the key name of the set + * @param filteredAreUncalled are filtered records uncalled? + * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? + * @return new VariantContext + */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, @@ -470,7 +492,7 @@ public class VariantContextUtils { if ( ! filteredAreUncalled || vc.isNotFiltered() ) VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false)); } - if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled + if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; // establish the baseline info from the first VC @@ -615,6 +637,17 @@ public class VariantContextUtils { return merged; } + public static Map> separateVariantContextsByType(Collection VCs) { + HashMap> mappedVCs = new HashMap>(); + for ( VariantContext vc : VCs ) { + if ( !mappedVCs.containsKey(vc.getType()) ) + mappedVCs.put(vc.getType(), new ArrayList()); + mappedVCs.get(vc.getType()).add(vc); + } + + return mappedVCs; + } + private static class AlleleMapper { private VariantContext vc = null; private Map map = null; @@ -834,6 +867,7 @@ public class VariantContextUtils { /** * create a genome location, given a variant context + * @param genomeLocParser parser * @param vc the variant context * @return the genomeLoc */ diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java index cca1eccb4..77159d9c2 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -52,8 +52,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest { @DataProvider(name = "data") public Object[][] createData() { - new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "fb7f4e011487ca56bce865ae5468cdc5"); - new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "423cec3befbf0a72d8bc3757ee628fc4"); + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc"); return TestParams.getTests(TestParams.class); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 00ee44f75..904a5b29b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -80,9 +80,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); } @@ -100,7 +100,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5")); + Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e")); executeTest("threeWayWithRefs", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java old mode 100644 new mode 100755 index 72647c8e1..1db712353 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -44,7 +44,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest { @Test(enabled = true) public void testComplexVariantsToTable() { WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(" -AMD"), - Arrays.asList("b2a3712c1bfad8f1383ffada8b5017ba")); + Arrays.asList("e8f771995127b727fb433da91dd4ee98")); executeTest("testComplexVariantsToTable", spec).getFirst(); } diff --git a/public/packages/PicardPrivate.xml b/public/packages/PicardPrivate.xml index 110b41d3f..581c47979 100644 --- a/public/packages/PicardPrivate.xml +++ b/public/packages/PicardPrivate.xml @@ -7,6 +7,8 @@ + + diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 6a47d4b97..1f4f79993 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -72,6 +72,9 @@ class DataProcessingPipeline extends QScript { @Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) var bwaThreads: Int = 1 + @Input(doc="Dont perform validation on the BAM files", fullName="no_validation", shortName="nv", required=false) + var noValidation: Boolean = false + /**************************************************************************** * Global Variables @@ -135,7 +138,7 @@ class DataProcessingPipeline extends QScript { } } - println("\n\n*** DEBUG ***\n") + println("\n\n*** INPUT FILES ***\n") // Creating one file for each sample in the dataset val sampleBamFiles = scala.collection.mutable.Map.empty[String, File] for ((sample, flist) <- sampleTable) { @@ -149,7 +152,7 @@ class DataProcessingPipeline extends QScript { sampleBamFiles(sample) = sampleFileName add(joinBams(flist, sampleFileName)) } - println("*** DEBUG ***\n\n") + println("*** INPUT FILES ***\n\n") return sampleBamFiles.toMap } @@ -246,7 +249,12 @@ class DataProcessingPipeline extends QScript { val preValidateLog = swapExt(bam, ".bam", ".pre.validation") val postValidateLog = swapExt(bam, ".bam", ".post.validation") - add(validate(bam, preValidateLog)) + // Validation is an optional step for the BAM file generated after + // alignment and the final bam file of the pipeline. + if (!noValidation) { + add(validate(bam, preValidateLog), + validate(recalBam, postValidateLog)) + } if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) add(target(bam, targetIntervals)) @@ -257,8 +265,8 @@ class DataProcessingPipeline extends QScript { recal(dedupedBam, preRecalFile, recalBam), cov(recalBam, postRecalFile), analyzeCovariates(preRecalFile, preOutPath), - analyzeCovariates(postRecalFile, postOutPath), - validate(recalBam, postValidateLog)) + analyzeCovariates(postRecalFile, postOutPath)) + cohortList :+= recalBam } @@ -282,6 +290,13 @@ class DataProcessingPipeline extends QScript { this.isIntermediate = true } + // General arguments to non-GATK tools + trait ExternalCommonArgs extends CommandLineFunction { + this.memoryLimit = 4 + this.isIntermediate = true + } + + case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs { if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) this.input_file :+= inBams @@ -300,8 +315,8 @@ class DataProcessingPipeline extends QScript { this.targetIntervals = tIntervals this.out = outBam this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - if (!indels.isEmpty) - this.rodBind :+= RodBind("indels", "VCF", indels) + if (!qscript.indels.isEmpty) + this.rodBind :+= RodBind("indels", "VCF", qscript.indels) this.consensusDeterminationModel = consensusDeterminationModel this.compress = 0 this.scatterCount = nContigs @@ -332,7 +347,6 @@ class DataProcessingPipeline extends QScript { this.isIntermediate = false this.analysisName = queueLogDir + outBam + ".recalibration" this.jobName = queueLogDir + outBam + ".recalibration" - } @@ -350,48 +364,41 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" } - case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates { + case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs { this.input = List(inBam) this.output = outBam this.metrics = metricsFile - this.memoryLimit = 6 - this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".dedup" this.jobName = queueLogDir + outBam + ".dedup" } - case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles { + case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs { this.input = inBams this.output = outBam - this.memoryLimit = 4 - this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".joinBams" this.jobName = queueLogDir + outBam + ".joinBams" } - case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam { + case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs { this.input = List(inSam) this.output = outBam this.sortOrder = sortOrderP - this.memoryLimit = 4 - this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".sortSam" this.jobName = queueLogDir + outBam + ".sortSam" } - case class validate (inBam: File, outLog: File) extends ValidateSamFile { + case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs { this.input = List(inBam) this.output = outLog this.maxRecordsInRam = 100000 this.REFERENCE_SEQUENCE = qscript.reference - this.memoryLimit = 4 this.isIntermediate = false this.analysisName = queueLogDir + outLog + ".validate" this.jobName = queueLogDir + outLog + ".validate" } - case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups { + case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs { this.input = List(inBam) this.output = outBam this.RGID = readGroup.id @@ -407,12 +414,7 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".rg" } - trait BWACommonArgs extends CommandLineFunction { - this.memoryLimit = 4 - this.isIntermediate = true - } - - case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with BWACommonArgs { + case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai @@ -420,7 +422,7 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outSai + ".bwa_aln_se" } - case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with BWACommonArgs { + case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai @@ -428,7 +430,7 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } - case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with BWACommonArgs { + case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bwa alignment index file") var sai = inSai @Output(doc="output aligned bam file") var alignedBam = outBam @@ -437,7 +439,7 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".bwa_sam_se" } - case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with BWACommonArgs { + case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1 @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2 diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala index fca420816..f8218148e 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala @@ -20,14 +20,14 @@ class RecalibrateBaseQualities extends QScript { @Input(doc="input BAM file - or list of BAM files", shortName="i", required=true) var input: File = _ - @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false) - var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R") + @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true) + var R: String = _ - @Input(doc="Reference fasta file", shortName="R", required=false) - var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + @Input(doc="Reference fasta file", shortName="R", required=true) + var reference: File = _ // new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") - @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false) - var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") + @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=true) + var dbSNP: File = _ // new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") val queueLogDir: String = ".qlog/" var nContigs: Int = 0 diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1954.jar b/settings/repository/edu.mit.broad/picard-private-parts-1954.jar deleted file mode 100644 index 67637d3d9..000000000 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1954.jar and /dev/null differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1954.xml~ b/settings/repository/edu.mit.broad/picard-private-parts-1954.xml~ deleted file mode 100644 index 07d51ae53..000000000 --- a/settings/repository/edu.mit.broad/picard-private-parts-1954.xml~ +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar b/settings/repository/edu.mit.broad/picard-private-parts-1959.jar new file mode 100644 index 000000000..ae11e636b Binary files /dev/null and b/settings/repository/edu.mit.broad/picard-private-parts-1959.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1954.xml b/settings/repository/edu.mit.broad/picard-private-parts-1959.xml similarity index 58% rename from settings/repository/edu.mit.broad/picard-private-parts-1954.xml rename to settings/repository/edu.mit.broad/picard-private-parts-1959.xml index c702fd6e5..e7c7e3a21 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1954.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-1959.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.48.889.xml b/settings/repository/net.sf/picard-1.48.889.xml deleted file mode 100644 index 877687930..000000000 --- a/settings/repository/net.sf/picard-1.48.889.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.48.889.jar b/settings/repository/net.sf/picard-1.49.895.jar similarity index 95% rename from settings/repository/net.sf/picard-1.48.889.jar rename to settings/repository/net.sf/picard-1.49.895.jar index 1b725dde5..3ee1f2090 100644 Binary files a/settings/repository/net.sf/picard-1.48.889.jar and b/settings/repository/net.sf/picard-1.49.895.jar differ diff --git a/settings/repository/net.sf/picard-1.49.895.xml b/settings/repository/net.sf/picard-1.49.895.xml new file mode 100644 index 000000000..52d4900c5 --- /dev/null +++ b/settings/repository/net.sf/picard-1.49.895.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.48.889.xml b/settings/repository/net.sf/sam-1.48.889.xml deleted file mode 100644 index 8046a0c02..000000000 --- a/settings/repository/net.sf/sam-1.48.889.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.48.889.jar b/settings/repository/net.sf/sam-1.49.895.jar similarity index 95% rename from settings/repository/net.sf/sam-1.48.889.jar rename to settings/repository/net.sf/sam-1.49.895.jar index 33ae4aa7d..c55ab0b72 100644 Binary files a/settings/repository/net.sf/sam-1.48.889.jar and b/settings/repository/net.sf/sam-1.49.895.jar differ diff --git a/settings/repository/net.sf/sam-1.49.895.xml b/settings/repository/net.sf/sam-1.49.895.xml new file mode 100644 index 000000000..0436ce881 --- /dev/null +++ b/settings/repository/net.sf/sam-1.49.895.xml @@ -0,0 +1,3 @@ + + +