Merge branch 'master' into rodTesting
Conflicts: public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
This commit is contained in:
commit
eef1ac415a
|
|
@ -156,7 +156,7 @@ public class UnifiedArgumentCollection {
|
|||
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false)
|
||||
@Argument(fullName = "dovit", shortName = "dovit", doc = "Perform full Viterbi calculation when evaluating the HMM", required = false)
|
||||
public boolean dovit = false;
|
||||
|
||||
@Hidden
|
||||
|
|
|
|||
|
|
@ -274,7 +274,7 @@ public class PairHMMIndelErrorModel {
|
|||
this.doViterbi = dovit;
|
||||
}
|
||||
|
||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) {
|
||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) {
|
||||
|
||||
|
||||
this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
|
||||
|
|
@ -754,7 +754,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
||||
if (indelLikelihoodMap.containsKey(p)) {
|
||||
HashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
HashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
int j=0;
|
||||
for (Allele a: haplotypeMap.keySet()) {
|
||||
readLikelihoods[readIdx][j++] = el.get(a);
|
||||
|
|
@ -1055,7 +1055,6 @@ public class PairHMMIndelErrorModel {
|
|||
genotypeLikelihoods[i] -= maxElement;
|
||||
|
||||
return genotypeLikelihoods;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.apache.poi.hpsf.Variant;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -165,6 +166,23 @@ import java.util.*;
|
|||
* -o output.vcf \
|
||||
* -fraction 0.5
|
||||
*
|
||||
* Select only indels from a VCF:
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T SelectVariants \
|
||||
* --variant input.vcf \
|
||||
* -o output.vcf \
|
||||
* -selectType INDEL
|
||||
*
|
||||
* Select only multi-allelic SNPs and MNPs from a VCF (i.e. SNPs with more than one allele listed in the ALT column):
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T SelectVariants \
|
||||
* --variant input.vcf \
|
||||
* -o output.vcf \
|
||||
* -selectType SNP -selectType MNP \
|
||||
* -restrictAllelesTo MULTIALLELIC
|
||||
*
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
|
|
@ -223,6 +241,18 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis", required=false)
|
||||
private boolean EXCLUDE_FILTERED = false;
|
||||
|
||||
|
||||
/**
|
||||
* When this argument is used, we can choose to include only multiallelic or biallelic sites, depending on how many alleles are listed in the ALT column of a vcf.
|
||||
* For example, a multiallelic record such as:
|
||||
* 1 100 . A AAA,AAAAA
|
||||
* will be excluded if "-restrictAllelesTo BIALLELIC" is included, because there are two alternate alleles, whereas a record such as:
|
||||
* 1 100 . A T
|
||||
* will be included in that case, but would be excluded if "-restrictAllelesTo MULTIALLELIC
|
||||
*/
|
||||
@Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false)
|
||||
private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL;
|
||||
|
||||
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
|
||||
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
|
||||
|
||||
|
|
@ -266,11 +296,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
|
||||
private double fractionRandom = 0;
|
||||
|
||||
@Argument(fullName="selectSNPs", shortName="snps", doc="Select only SNPs from the input file", required=false)
|
||||
private boolean SELECT_SNPS = false;
|
||||
/**
|
||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||
* When specified one or more times, a particular type of variant is selected.
|
||||
*
|
||||
*/
|
||||
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
|
||||
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
|
||||
|
||||
@Argument(fullName="selectIndels", shortName="indels", doc="Select only indels from the input file", required=false)
|
||||
private boolean SELECT_INDELS = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="outMVFile", shortName="outMVFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||
|
|
@ -290,6 +323,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
}
|
||||
|
||||
public enum NumberAlleleRestriction {
|
||||
ALL,
|
||||
BIALLELIC,
|
||||
MULTIALLELIC
|
||||
}
|
||||
|
||||
private ArrayList<VariantContext.Type> selectedTypes = new ArrayList<VariantContext.Type>();
|
||||
private ArrayList<String> selectNames = new ArrayList<String>();
|
||||
private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
|
||||
|
||||
|
|
@ -354,6 +394,20 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
for ( String sample : samples )
|
||||
logger.info("Including sample '" + sample + "'");
|
||||
|
||||
|
||||
|
||||
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
||||
if (TYPES_TO_INCLUDE.isEmpty()) {
|
||||
|
||||
for (VariantContext.Type t : VariantContext.Type.values())
|
||||
selectedTypes.add(t);
|
||||
|
||||
}
|
||||
else {
|
||||
for (VariantContext.Type t : TYPES_TO_INCLUDE)
|
||||
selectedTypes.add(t);
|
||||
|
||||
}
|
||||
// Initialize VCF header
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
||||
|
|
@ -494,12 +548,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// TODO - add ability to also select MNPs
|
||||
// TODO - move variant selection arguments to the engine so other walkers can also do this
|
||||
if (SELECT_INDELS && !(vc.isIndel() || vc.isMixed()))
|
||||
if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
|
||||
continue;
|
||||
|
||||
if (SELECT_SNPS && !vc.isSNP())
|
||||
if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic())
|
||||
continue;
|
||||
|
||||
if (!selectedTypes.contains(vc.getType()))
|
||||
continue;
|
||||
|
||||
VariantContext sub = subsetRecord(vc, samples);
|
||||
|
|
|
|||
|
|
@ -124,8 +124,12 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
* multi-allelic INFO field values can be lists of values.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
|
||||
public boolean keepMultiAllelic = false;
|
||||
@Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
|
||||
public boolean keepMultiAllelic = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="logACSum", shortName="logACSum", doc="Log sum of AC instead of max value in case of multiallelic variants", required=false)
|
||||
public boolean logACSum = false;
|
||||
|
||||
/**
|
||||
* By default, this tool throws a UserException when it encounters a field without a value in some record. This
|
||||
|
|
@ -147,14 +151,13 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
nRecords++;
|
||||
for ( VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) {
|
||||
if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) {
|
||||
List<String> vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA);
|
||||
List<String> vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA, keepMultiAllelic, logACSum);
|
||||
out.println(Utils.join("\t", vals));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -176,9 +179,11 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
* @param fields a non-null list of fields to capture from VC
|
||||
* @param allowMissingData if false, then throws a UserException if any field isn't found in vc. Otherwise
|
||||
* provides a value of NA
|
||||
* @param kma if true, multiallelic variants are to be kept
|
||||
* @param logsum if true, AF and AC are computed based on sum of allele counts. Otherwise, based on allele with highest count.
|
||||
* @return
|
||||
*/
|
||||
public static List<String> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData) {
|
||||
private static List<String> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData, boolean kma, boolean logsum) {
|
||||
List<String> vals = new ArrayList<String>();
|
||||
|
||||
for ( String field : fields ) {
|
||||
|
|
@ -206,12 +211,31 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
if (field.equals("AF") || field.equals("AC")) {
|
||||
if (val.contains(",")) {
|
||||
// strip [,] and spaces
|
||||
val = val.replace("[","");
|
||||
val = val.replace("]","");
|
||||
val = val.replace(" ","");
|
||||
}
|
||||
String afo = val;
|
||||
|
||||
double af=0;
|
||||
if (afo.contains(",")) {
|
||||
String[] afs = afo.split(",");
|
||||
afs[0] = afs[0].substring(1,afs[0].length());
|
||||
afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
|
||||
|
||||
double[] afd = new double[afs.length];
|
||||
|
||||
for (int k=0; k < afd.length; k++)
|
||||
afd[k] = Double.valueOf(afs[k]);
|
||||
|
||||
if (kma && logsum)
|
||||
af = MathUtils.sum(afd);
|
||||
else
|
||||
af = MathUtils.arrayMax(afd);
|
||||
//af = Double.valueOf(afs[0]);
|
||||
|
||||
}
|
||||
else
|
||||
if (!afo.equals("NA"))
|
||||
af = Double.valueOf(afo);
|
||||
|
||||
val = Double.toString(af);
|
||||
|
||||
}
|
||||
vals.add(val);
|
||||
|
|
@ -220,6 +244,9 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
return vals;
|
||||
}
|
||||
|
||||
public static List<String> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData) {
|
||||
return extractFields(vc, fields, allowMissingData, false, false);
|
||||
}
|
||||
//
|
||||
// default reduce -- doesn't do anything at all
|
||||
//
|
||||
|
|
@ -243,7 +270,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
getters.put("REF", new Getter() {
|
||||
public String get(VariantContext vc) {
|
||||
String x = "";
|
||||
if ( vc.hasReferenceBaseForIndel() ) {
|
||||
if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() ) {
|
||||
Byte refByte = vc.getReferenceBaseForIndel();
|
||||
x=x+new String(new byte[]{refByte});
|
||||
}
|
||||
|
|
@ -255,7 +282,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
StringBuilder x = new StringBuilder();
|
||||
int n = vc.getAlternateAlleles().size();
|
||||
if ( n == 0 ) return ".";
|
||||
if ( vc.hasReferenceBaseForIndel() ) {
|
||||
if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() ) {
|
||||
Byte refByte = vc.getReferenceBaseForIndel();
|
||||
x.append(new String(new byte[]{refByte}));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -281,7 +281,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
|
||||
VariantContext vc = null;
|
||||
try {
|
||||
vc = new VariantContext(name, contig, pos, loc, alleles, qual, filters, attributes);
|
||||
vc = new VariantContext(name, contig, pos, loc, alleles, qual, filters, attributes, ref.getBytes()[0]);
|
||||
} catch (Exception e) {
|
||||
generateException(e.getMessage());
|
||||
}
|
||||
|
|
@ -290,8 +290,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
if ( !header.samplesWereAlreadySorted() )
|
||||
vc.getGenotypes();
|
||||
|
||||
// Trim bases of all alleles if necessary
|
||||
return createVariantContextWithTrimmedAlleles(vc);
|
||||
return vc;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -516,25 +515,44 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
return true;
|
||||
}
|
||||
|
||||
private static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||
boolean clipping = true;
|
||||
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
||||
// base to all alleles, and to correctly compute reverse clipping,
|
||||
// but it is not used for actually changing alleles - this is done in function
|
||||
// createVariantContextWithTrimmedAlleles() below.
|
||||
|
||||
for (Allele a : unclippedAlleles) {
|
||||
if (a.isSymbolic()) {
|
||||
for ( Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
}
|
||||
if (a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0])) {
|
||||
|
||||
if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) {
|
||||
clipping = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (clipping) ? 1 : 0;
|
||||
|
||||
return (clipping) ? 1 : 0;
|
||||
}
|
||||
|
||||
protected static int computeReverseClipping(List<Allele> unclippedAlleles, String ref, int forwardClipping, int lineNo) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
while ( stillClipping ) {
|
||||
for ( Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 )
|
||||
stillClipping = false;
|
||||
else if ( ref.length() == clipping )
|
||||
generateException("bad alleles encountered", lineNo);
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] )
|
||||
stillClipping = false;
|
||||
}
|
||||
if ( stillClipping )
|
||||
clipping++;
|
||||
}
|
||||
|
||||
return clipping;
|
||||
}
|
||||
/**
|
||||
* clip the alleles, based on the reference
|
||||
*
|
||||
|
|
@ -542,122 +560,30 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
* @param ref the reference string
|
||||
* @param unclippedAlleles the list of unclipped alleles
|
||||
* @param clippedAlleles output list of clipped alleles
|
||||
* @param lineNo the current line number in the file
|
||||
* @return the new reference end position of this event
|
||||
*/
|
||||
protected static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
||||
|
||||
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
||||
// base to all alleles, and to correctly compute reverse clipping,
|
||||
// but it is not used for actually changing alleles - this is done in function
|
||||
// createVariantContextWithTrimmedAlleles() below.
|
||||
|
||||
int forwardClipping = computeForwardClipping(unclippedAlleles, ref);
|
||||
|
||||
int reverseClipped = 0;
|
||||
boolean clipping = true;
|
||||
while (clipping) {
|
||||
for (Allele a : unclippedAlleles) {
|
||||
if (a.isSymbolic()) {
|
||||
continue;
|
||||
}
|
||||
if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0)
|
||||
clipping = false;
|
||||
else if (ref.length() == reverseClipped)
|
||||
generateException("bad alleles encountered", lineNo);
|
||||
else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1])
|
||||
clipping = false;
|
||||
}
|
||||
if (clipping) reverseClipped++;
|
||||
}
|
||||
int reverseClipping = computeReverseClipping(unclippedAlleles, ref, forwardClipping, lineNo);
|
||||
|
||||
if ( clippedAlleles != null ) {
|
||||
for ( Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() ) {
|
||||
clippedAlleles.add(a);
|
||||
} else {
|
||||
clippedAlleles.add(Allele.create(Arrays.copyOfRange(a.getBases(),0,a.getBases().length-reverseClipped),a.isReference()));
|
||||
clippedAlleles.add(Allele.create(Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length-reverseClipping), a.isReference()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// the new reference length
|
||||
int refLength = ref.length() - reverseClipped;
|
||||
int refLength = ref.length() - reverseClipping;
|
||||
|
||||
return position+Math.max(refLength - 1,0);
|
||||
}
|
||||
|
||||
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
|
||||
// see if we need to trim common reference base from all alleles
|
||||
boolean trimVC;
|
||||
|
||||
// We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles
|
||||
Allele refAllele = inputVC.getReference();
|
||||
if (!inputVC.isVariant())
|
||||
trimVC = false;
|
||||
else if (refAllele.isNull())
|
||||
trimVC = false;
|
||||
else {
|
||||
trimVC = (computeForwardClipping(new ArrayList<Allele>(inputVC.getAlternateAlleles()),
|
||||
inputVC.getReference().getDisplayString()) > 0);
|
||||
}
|
||||
|
||||
// nothing to do if we don't need to trim bases
|
||||
if (trimVC) {
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||
|
||||
// set the reference base for indels in the attributes
|
||||
Map<String,Object> attributes = new TreeMap<String,Object>(inputVC.getAttributes());
|
||||
|
||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
|
||||
Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation
|
||||
// example: mixed records such as {TA*,TGA,TG}
|
||||
boolean hasNullAlleles = false;
|
||||
|
||||
for (Allele a: originalToTrimmedAlleleMap.values()) {
|
||||
if (a.isNull())
|
||||
hasNullAlleles = true;
|
||||
if (a.isReference())
|
||||
refAllele = a;
|
||||
}
|
||||
|
||||
if (!hasNullAlleles)
|
||||
return inputVC;
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( Map.Entry<String, Genotype> sample : inputVC.getGenotypes().entrySet() ) {
|
||||
|
||||
List<Allele> originalAlleles = sample.getValue().getAlleles();
|
||||
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles));
|
||||
|
||||
}
|
||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0]));
|
||||
|
||||
}
|
||||
|
||||
return inputVC;
|
||||
}
|
||||
|
||||
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
|
||||
try {
|
||||
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
|
||||
|
|
|
|||
|
|
@ -209,6 +209,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
/**
|
||||
* the complete constructor. Makes a complete VariantContext from its arguments
|
||||
* This is the only constructor that is able to create indels! DO NOT USE THE OTHER ONES.
|
||||
*
|
||||
* @param source source
|
||||
* @param contig the contig
|
||||
|
|
@ -257,9 +258,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @param negLog10PError qual
|
||||
* @param filters filters: use null for unfiltered and empty set for passes filters
|
||||
* @param attributes attributes
|
||||
* @param referenceBaseForIndel padded reference base
|
||||
*/
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, null, true);
|
||||
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
|
||||
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -293,7 +295,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
|
||||
/**
|
||||
* Create a new variant context without genotypes and no Perror, no filters, and no attributes
|
||||
* Create a new variant context with genotypes but without Perror, filters, and attributes
|
||||
*
|
||||
* @param source source
|
||||
* @param contig the contig
|
||||
|
|
|
|||
|
|
@ -657,12 +657,84 @@ public class VariantContextUtils {
|
|||
|
||||
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) );
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
merged = AbstractVCFCodec.createVariantContextWithTrimmedAlleles(merged);
|
||||
merged = createVariantContextWithTrimmedAlleles(merged);
|
||||
|
||||
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
||||
return merged;
|
||||
}
|
||||
|
||||
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
|
||||
// see if we need to trim common reference base from all alleles
|
||||
boolean trimVC;
|
||||
|
||||
// We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles
|
||||
Allele refAllele = inputVC.getReference();
|
||||
if (!inputVC.isVariant())
|
||||
trimVC = false;
|
||||
else if (refAllele.isNull())
|
||||
trimVC = false;
|
||||
else {
|
||||
trimVC = (AbstractVCFCodec.computeForwardClipping(new ArrayList<Allele>(inputVC.getAlternateAlleles()),
|
||||
inputVC.getReference().getDisplayString()) > 0);
|
||||
}
|
||||
|
||||
// nothing to do if we don't need to trim bases
|
||||
if (trimVC) {
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||
|
||||
// set the reference base for indels in the attributes
|
||||
Map<String,Object> attributes = new TreeMap<String,Object>(inputVC.getAttributes());
|
||||
|
||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
|
||||
Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation
|
||||
// example: mixed records such as {TA*,TGA,TG}
|
||||
boolean hasNullAlleles = false;
|
||||
|
||||
for (Allele a: originalToTrimmedAlleleMap.values()) {
|
||||
if (a.isNull())
|
||||
hasNullAlleles = true;
|
||||
if (a.isReference())
|
||||
refAllele = a;
|
||||
}
|
||||
|
||||
if (!hasNullAlleles)
|
||||
return inputVC;
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( Map.Entry<String, Genotype> sample : inputVC.getGenotypes().entrySet() ) {
|
||||
|
||||
List<Allele> originalAlleles = sample.getValue().getAlleles();
|
||||
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles));
|
||||
|
||||
}
|
||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0]));
|
||||
|
||||
}
|
||||
|
||||
return inputVC;
|
||||
}
|
||||
|
||||
public static Map<String, Genotype> stripPLs(Map<String, Genotype> genotypes) {
|
||||
Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size());
|
||||
|
||||
|
|
|
|||
|
|
@ -77,6 +77,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testConcordance--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVariantTypeSelection() {
|
||||
String testFile = validationDataLocation + "complexExample1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s -NO_HEADER",
|
||||
1,
|
||||
Arrays.asList("e0b12c0b47a8a7a988b3587b47bfa8cf")
|
||||
);
|
||||
|
||||
executeTest("testVariantTypeSelection--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test(enabled=false)
|
||||
public void testRemovePLs() {
|
||||
String testFile = validationDataLocation + "combine.3.vcf";
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
|
||||
public class LibDrmaaIntegrationTest extends BaseTest {
|
||||
private String implementation = null;
|
||||
|
||||
@Test
|
||||
public void testDrmaa() throws Exception {
|
||||
|
|
@ -79,10 +80,24 @@ public class LibDrmaaIntegrationTest extends BaseTest {
|
|||
Assert.fail(String.format("Could not get DRMAA implementation from the DRMAA library: %s", error.getString(0)));
|
||||
|
||||
System.out.println(String.format("DRMAA implementation(s): %s", drmaaImplementation.getString(0)));
|
||||
|
||||
this.implementation = drmaaImplementation.getString(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(dependsOnMethods = { "testDrmaa" })
|
||||
public void testSubmitEcho() throws Exception {
|
||||
if (implementation.indexOf("LSF") >= 0) {
|
||||
System.err.println(" *********************************************************");
|
||||
System.err.println(" ***********************************************************");
|
||||
System.err.println(" **** ****");
|
||||
System.err.println(" **** Skipping LibDrmaaIntegrationTest.testSubmitEcho() ****");
|
||||
System.err.println(" **** Are you using the dotkit .combined_LSF_SGE? ****");
|
||||
System.err.println(" **** ****");
|
||||
System.err.println(" ***********************************************************");
|
||||
System.err.println(" *********************************************************");
|
||||
return;
|
||||
}
|
||||
|
||||
Memory error = new Memory(LibDrmaa.DRMAA_ERROR_STRING_BUFFER);
|
||||
int errnum;
|
||||
|
||||
|
|
@ -129,7 +144,7 @@ public class LibDrmaaIntegrationTest extends BaseTest {
|
|||
errnum = LibDrmaa.drmaa_set_vector_attribute(jt, LibDrmaa.DRMAA_V_ARGV, args, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN);
|
||||
|
||||
if (errnum != LibDrmaa.DRMAA_ERRNO.DRMAA_ERRNO_SUCCESS)
|
||||
Assert.fail(String.format("Could not set attribute \"%s\": %s", LibDrmaa.DRMAA_REMOTE_COMMAND, error.getString(0)));
|
||||
Assert.fail(String.format("Could not set attribute \"%s\": %s", LibDrmaa.DRMAA_V_ARGV, error.getString(0)));
|
||||
|
||||
errnum = LibDrmaa.drmaa_run_job(jobIdMem, LibDrmaa.DRMAA_JOBNAME_BUFFER_LEN, jt, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN);
|
||||
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class DataProcessingPipeline extends QScript {
|
|||
// Because the realignment only happens after these scripts are executed, in case you are using
|
||||
// bwa realignment, this function will operate over the original bam files and output over the
|
||||
// (to be realigned) bam files.
|
||||
def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, File] = {
|
||||
def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, List[File]] = {
|
||||
|
||||
// Creating a table with SAMPLE information from each input BAM file
|
||||
val sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
|
||||
|
|
@ -131,24 +131,25 @@ class DataProcessingPipeline extends QScript {
|
|||
sampleTable(sample) :+= rBam
|
||||
}
|
||||
}
|
||||
return sampleTable.toMap
|
||||
|
||||
println("\n\n*** INPUT FILES ***\n")
|
||||
// Creating one file for each sample in the dataset
|
||||
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||
for ((sample, flist) <- sampleTable) {
|
||||
|
||||
println(sample + ":")
|
||||
for (f <- flist)
|
||||
println (f)
|
||||
println()
|
||||
|
||||
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
|
||||
sampleBamFiles(sample) = sampleFileName
|
||||
add(writeList(flist, sampleFileName))
|
||||
}
|
||||
println("*** INPUT FILES ***\n\n")
|
||||
|
||||
return sampleBamFiles.toMap
|
||||
// println("\n\n*** INPUT FILES ***\n")
|
||||
// // Creating one file for each sample in the dataset
|
||||
// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||
// for ((sample, flist) <- sampleTable) {
|
||||
//
|
||||
// println(sample + ":")
|
||||
// for (f <- flist)
|
||||
// println (f)
|
||||
// println()
|
||||
//
|
||||
// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
|
||||
// sampleBamFiles(sample) = sampleFileName
|
||||
// //add(writeList(flist, sampleFileName))
|
||||
// }
|
||||
// println("*** INPUT FILES ***\n\n")
|
||||
//
|
||||
// return sampleBamFiles.toMap
|
||||
}
|
||||
|
||||
// Rebuilds the Read Group string to give BWA
|
||||
|
|
@ -224,7 +225,10 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
|
||||
def script = {
|
||||
// final output list of processed bam files
|
||||
var cohortList: List[File] = List()
|
||||
|
||||
// sets the model for the Indel Realigner
|
||||
cleanModelEnum = getIndelCleaningModel()
|
||||
|
||||
// keep a record of the number of contigs in the first bam file in the list
|
||||
|
|
@ -233,28 +237,19 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)}
|
||||
|
||||
// Generate a BAM file per sample joining all per lane files if necessary
|
||||
val sampleBAMFiles: Map[String, File] = createSampleFiles(bams, realignedBAMs)
|
||||
// generate a BAM file per sample joining all per lane files if necessary
|
||||
val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs)
|
||||
|
||||
// Final output list of processed bam files
|
||||
var cohortList: List[File] = List()
|
||||
|
||||
// Simple progress report
|
||||
println("\nFound the following samples: ")
|
||||
for ((sample, file) <- sampleBAMFiles)
|
||||
println("\t" + sample + " -> " + file)
|
||||
println("\n")
|
||||
|
||||
// If this is a 'knowns only' indel realignment run, do it only once for all samples.
|
||||
// if this is a 'knowns only' indel realignment run, do it only once for all samples.
|
||||
val globalIntervals = new File(outputDir + projectName + ".intervals")
|
||||
if (cleaningModel == ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
add(target(null, globalIntervals))
|
||||
|
||||
// Put each sample through the pipeline
|
||||
for ((sample, sampleFile) <- sampleBAMFiles) {
|
||||
val bam = if (sampleFile.endsWith(".list")) {swapExt(sampleFile, ".list", ".bam")} else {sampleFile}
|
||||
// put each sample through the pipeline
|
||||
for ((sample, bamList) <- sampleBAMFiles) {
|
||||
|
||||
// BAM files generated by the pipeline
|
||||
val bam = new File(qscript.projectName + "." + sample + ".bam")
|
||||
val cleanedBam = swapExt(bam, ".bam", ".clean.bam")
|
||||
val dedupedBam = swapExt(bam, ".bam", ".clean.dedup.bam")
|
||||
val recalBam = swapExt(bam, ".bam", ".clean.dedup.recal.bam")
|
||||
|
|
@ -272,15 +267,16 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
// Validation is an optional step for the BAM file generated after
|
||||
// alignment and the final bam file of the pipeline.
|
||||
if (!noValidation && sampleFile.endsWith(".bam")) { // todo -- implement validation for .list BAM files
|
||||
if (!noValidation) {
|
||||
for (sampleFile <- bamList)
|
||||
add(validate(sampleFile, preValidateLog),
|
||||
validate(recalBam, postValidateLog))
|
||||
}
|
||||
|
||||
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
add(target(sampleFile, targetIntervals))
|
||||
add(target(bamList, targetIntervals))
|
||||
|
||||
add(clean(sampleFile, targetIntervals, cleanedBam),
|
||||
add(clean(bamList, targetIntervals, cleanedBam),
|
||||
dedup(cleanedBam, dedupedBam, metricsFile),
|
||||
cov(dedupedBam, preRecalFile),
|
||||
recal(dedupedBam, preRecalFile, recalBam),
|
||||
|
|
@ -320,9 +316,9 @@ class DataProcessingPipeline extends QScript {
|
|||
this.maxRecordsInRam = 100000
|
||||
}
|
||||
|
||||
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||
case class target (inBams: List[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||
if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
this.input_file :+= inBams
|
||||
this.input_file = inBams
|
||||
this.out = outIntervals
|
||||
this.mismatchFraction = 0.0
|
||||
this.known :+= qscript.dbSNP
|
||||
|
|
@ -333,8 +329,8 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outIntervals + ".target"
|
||||
}
|
||||
|
||||
case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
|
||||
this.input_file :+= inBams
|
||||
case class clean (inBams: List[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
|
||||
this.input_file = inBams
|
||||
this.targetIntervals = tIntervals
|
||||
this.out = outBam
|
||||
this.known :+= qscript.dbSNP
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ import org.broadinstitute.sting.queue.QScript
|
|||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.util.QScriptUtils
|
||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||
import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups}
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException
|
||||
import org.broadinstitute.sting.commandline.Hidden
|
||||
import org.broadinstitute.sting.queue.extensions.picard.{ReorderSam, SortSam, AddOrReplaceReadGroups}
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -16,7 +16,7 @@ import org.broadinstitute.sting.commandline.Hidden
|
|||
*/
|
||||
|
||||
|
||||
class RecalibrateBaseQualities extends QScript {
|
||||
class PacbioProcessingPipeline extends QScript {
|
||||
|
||||
@Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true)
|
||||
var input: File = _
|
||||
|
|
@ -39,13 +39,16 @@ class RecalibrateBaseQualities extends QScript {
|
|||
@Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false)
|
||||
var bwaPath: File = _
|
||||
|
||||
@Input(doc="Input is a BLASR generated BAM file", shortName = "blasr", fullName="blasr_bam", required=false)
|
||||
var BLASR_BAM: Boolean = false
|
||||
|
||||
@Hidden
|
||||
@Input(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false)
|
||||
var dbq: Int = 20
|
||||
|
||||
|
||||
|
||||
|
||||
@Hidden
|
||||
@Input(shortName="bwastring", required=false)
|
||||
var bwastring: String = ""
|
||||
|
||||
val queueLogDir: String = ".qlog/"
|
||||
|
||||
|
|
@ -57,8 +60,6 @@ class RecalibrateBaseQualities extends QScript {
|
|||
|
||||
var USE_BWA: Boolean = false
|
||||
|
||||
println("DEBUG: processing " + file + "\nDEBUG: name -- " + file.getName)
|
||||
|
||||
if (file.endsWith(".fasta") || file.endsWith(".fq")) {
|
||||
if (bwaPath == null) {
|
||||
throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA");
|
||||
|
|
@ -69,28 +70,34 @@ class RecalibrateBaseQualities extends QScript {
|
|||
// FASTA -> BAM steps
|
||||
val alignedSam: File = file.getName + ".aligned.sam"
|
||||
val sortedBam: File = swapExt(alignedSam, ".sam", ".bam")
|
||||
val qualBam: File = swapExt(sortedBam, ".bam", ".q.bam")
|
||||
val rgBam: File = swapExt(file, ".bam", ".rg.bam")
|
||||
|
||||
val bamBase = if (USE_BWA) {rgBam} else {file}
|
||||
|
||||
// BAM Steps
|
||||
val mqBAM: File = swapExt(bamBase, ".bam", ".mq.bam")
|
||||
val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv")
|
||||
val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv")
|
||||
val recalBam: File = swapExt(bamBase, ".bam", ".recal.bam")
|
||||
val path1: String = recalBam + ".before"
|
||||
val path2: String = recalBam + ".after"
|
||||
|
||||
|
||||
if (USE_BWA) {
|
||||
add(align(file, alignedSam),
|
||||
sortSam(alignedSam, sortedBam),
|
||||
addQuals(sortedBam, qualBam, dbq),
|
||||
addReadGroup(qualBam, rgBam, sample))
|
||||
addReadGroup(sortedBam, rgBam, sample))
|
||||
}
|
||||
|
||||
add(cov(bamBase, recalFile1),
|
||||
recal(bamBase, recalFile1, recalBam),
|
||||
else if (BLASR_BAM) {
|
||||
val reorderedBAM = swapExt(bamBase, ".bam", ".reordered.bam")
|
||||
add(reorder(bamBase, reorderedBAM),
|
||||
changeMQ(reorderedBAM, mqBAM))
|
||||
}
|
||||
|
||||
val bam = if (BLASR_BAM) {mqBAM} else {bamBase}
|
||||
|
||||
add(cov(bam, recalFile1),
|
||||
recal(bam, recalFile1, recalBam),
|
||||
cov(recalBam, recalFile2),
|
||||
analyzeCovariates(recalFile1, path1),
|
||||
analyzeCovariates(recalFile2, path2))
|
||||
|
|
@ -110,13 +117,13 @@ class RecalibrateBaseQualities extends QScript {
|
|||
|
||||
|
||||
case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs {
|
||||
def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z10 -t8 " + reference + " " + inFastq + " > " + outSam
|
||||
def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z20 -t16 " + reference + " " + inFastq + " > " + outSam
|
||||
this.memoryLimit = 8
|
||||
this.analysisName = queueLogDir + outSam + ".bwa_sam_se"
|
||||
this.jobName = queueLogDir + outSam + ".bwa_sam_se"
|
||||
}
|
||||
|
||||
case class sortSam (@Input inSam: File, @Output outBam: File) extends SortSam with ExternalCommonArgs {
|
||||
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
|
||||
case class sortSam (inSam: File, outBam: File) extends SortSam with ExternalCommonArgs {
|
||||
this.input = List(inSam)
|
||||
this.output = outBam
|
||||
this.sortOrder = SortOrder.coordinate
|
||||
|
|
@ -124,10 +131,16 @@ class RecalibrateBaseQualities extends QScript {
|
|||
this.jobName = queueLogDir + outBam + ".sortSam"
|
||||
}
|
||||
|
||||
case class addQuals(inBam: File, outBam: File, qual: Int) extends PrintReads with CommandLineGATKArgs {
|
||||
case class reorder (inSam: File, outSam: File) extends ReorderSam with ExternalCommonArgs {
|
||||
this.input = List(inSam)
|
||||
this.output = outSam
|
||||
this.sortReference = reference
|
||||
}
|
||||
|
||||
case class changeMQ(inBam: File, outBam: File) extends PrintReads with CommandLineGATKArgs {
|
||||
this.input_file :+= inBam
|
||||
this.out = outBam
|
||||
this.DBQ = qual
|
||||
this.read_filter :+= "ReassignMappingQuality"
|
||||
}
|
||||
|
||||
case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs {
|
||||
|
|
@ -145,7 +158,8 @@ class RecalibrateBaseQualities extends QScript {
|
|||
}
|
||||
|
||||
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
this.DBQ = dbq
|
||||
this.knownSites :+= dbSNP
|
||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = outRecalFile
|
||||
|
|
@ -155,6 +169,7 @@ class RecalibrateBaseQualities extends QScript {
|
|||
}
|
||||
|
||||
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
||||
this.DBQ = dbq
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = inRecalFile
|
||||
this.out = outBam
|
||||
|
|
|
|||
|
|
@ -0,0 +1,48 @@
|
|||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
||||
import java.io.File
|
||||
/*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: carneiro
|
||||
* Date: 6/22/11
|
||||
* Time: 10:35 AM
|
||||
*/
|
||||
class ReorderSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
|
||||
analysisName = "ReorderSam"
|
||||
javaMainClass = "net.sf.picard.sam.ReorderSam"
|
||||
|
||||
@Input(doc="Input file (bam or sam) to extract reads from.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: List[File] = Nil
|
||||
|
||||
@Output(doc="Output file (bam or sam) to write extracted reads to.", shortName = "output", fullName = "output_bam_file", required = true)
|
||||
var output: File = _
|
||||
|
||||
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
|
||||
var outputIndex: File = _
|
||||
|
||||
@Argument(doc="Reference sequence to reorder reads to match.", shortName = "ref", fullName = "sort_reference", required = true)
|
||||
var sortReference: File = _
|
||||
|
||||
@Argument(doc="If true, then allows only a partial overlap of the BAM contigs with the new reference sequence contigs. By default, this tool requires a corresponding contig in the new reference for each read contig.", shortName = "aic", fullName = "allow_incomplete_concordance", required = false)
|
||||
var ALLOW_INCOMPLETE_DICT_CONCORDANCE: Boolean = _
|
||||
|
||||
@Argument(doc="If true, then permits mapping from a read contig to a new reference contig with the same name but a different length. Highly dangerous, only use if you know what you are doing.", shortName = "acld", fullName = "allow_contig_length_discordance", required = false)
|
||||
var ALLOW_CONTIG_LENGTH_DISCORDANCE: Boolean = _
|
||||
|
||||
override def freezeFieldValues() {
|
||||
super.freezeFieldValues()
|
||||
if (outputIndex == null && output != null)
|
||||
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
|
||||
}
|
||||
|
||||
override def inputBams = input
|
||||
override def outputBam = output
|
||||
this.createIndex = Some(true)
|
||||
this.sortOrder = null
|
||||
override def commandLine = super.commandLine +
|
||||
" REFERENCE=" + sortReference +
|
||||
optional(" ALLOW_INCOMPLETE_DICT_CONCORDANCE=", ALLOW_INCOMPLETE_DICT_CONCORDANCE)
|
||||
optional(" ALLOW_CONTIG_LENGTH_DISCORDANCE=", ALLOW_CONTIG_LENGTH_DISCORDANCE)
|
||||
}
|
||||
Loading…
Reference in New Issue