Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-08-09 10:31:10 -04:00
commit e48727dae3
8 changed files with 25 additions and 92 deletions

View File

@ -25,9 +25,6 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
@ -183,7 +180,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
* A value of 0 turns downsampling off.
*/
@Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false)
protected int downsampleCoverage = 0;
protected int downsampleCoverage = 250;
@Hidden
@Argument(fullName = "", shortName = "dl", doc = "", required = false)
@ -535,81 +532,12 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
if (debugLevel == 1)
System.out.println("BAM: " + read.getCigar() + " " + read.getAlignmentStart() + " " + read.getAlignmentEnd());
// if (!DONT_USE_SOFTCLIPPED_BASES)
// reSoftClipBases(read);
if (!DONT_COMPRESS_READ_NAMES)
compressReadName(read);
out.addAlignment(read);
}
private void reSoftClipBases(GATKSAMRecord read) {
Integer left = (Integer) read.getTemporaryAttribute("SL");
Integer right = (Integer) read.getTemporaryAttribute("SR");
if (left != null || right != null) {
Cigar newCigar = new Cigar();
for (CigarElement element : read.getCigar().getCigarElements()) {
newCigar.add(new CigarElement(element.getLength(), element.getOperator()));
}
if (left != null) {
newCigar = updateFirstSoftClipCigarElement(left, newCigar);
read.setAlignmentStart(read.getAlignmentStart() + left);
}
if (right != null) {
Cigar invertedCigar = invertCigar(newCigar);
newCigar = invertCigar(updateFirstSoftClipCigarElement(right, invertedCigar));
}
read.setCigar(newCigar);
}
}
/**
* Facility routine to revert the first element of a Cigar string (skipping hard clips) into a soft-clip.
* To be used on both ends if provided a flipped Cigar
*
* @param softClipSize the length of the soft clipped element to add
* @param originalCigar the original Cigar string
* @return a new Cigar object with the soft clips added
*/
private Cigar updateFirstSoftClipCigarElement (int softClipSize, Cigar originalCigar) {
Cigar result = new Cigar();
CigarElement leftElement = new CigarElement(softClipSize, CigarOperator.S);
boolean updated = false;
for (CigarElement element : originalCigar.getCigarElements()) {
if (!updated && element.getOperator() == CigarOperator.M) {
result.add(leftElement);
int newLength = element.getLength() - softClipSize;
if (newLength > 0)
result.add(new CigarElement(newLength, CigarOperator.M));
updated = true;
}
else
result.add(element);
}
return result;
}
/**
* Given a cigar string, returns the inverted cigar string.
*
* @param cigar the original cigar
* @return the inverted cigar
*/
private Cigar invertCigar(Cigar cigar) {
Stack<CigarElement> stack = new Stack<CigarElement>();
for (CigarElement e : cigar.getCigarElements())
stack.push(e);
Cigar inverted = new Cigar();
while (!stack.empty()) {
inverted.add(stack.pop());
}
return inverted;
}
/**
* Quality control procedure that checks if the consensus reads contains too many
* mismatches with the reference. This should never happen and is a good trigger for

View File

@ -499,7 +499,7 @@ public class SlidingWindow {
result.addAll(addToSyntheticReads(0, start));
result.addAll(finalizeAndAdd(ConsensusType.BOTH));
for (GATKSAMRecord read : result) {
for (GATKSAMRecord read : allReads) {
readsInWindow.remove(read); // todo -- not optimal, but needs to be done so the next region doesn't try to remove the same reads from the header counts.
}
@ -627,7 +627,7 @@ public class SlidingWindow {
int locationIndex = startLocation < 0 ? 0 : readStart - startLocation;
if (removeRead && locationIndex < 0)
throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation);
throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " start " + read.getUnclippedStart() + "," + read.getUnclippedEnd() + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation);
if (!removeRead) { // we only need to create new header elements if we are adding the read, not when we're removing it
if (locationIndex < 0) { // Do we need to add extra elements before the start of the header? -- this may happen if the previous read was clipped and this alignment starts before the beginning of the window

View File

@ -681,8 +681,8 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
}
// per-pool logging of AC and AF
gb.attribute(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
gb.attribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
// remove PLs if necessary
if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)

View File

@ -21,28 +21,28 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testDefaultCompression() {
RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f");
RRTest("testDefaultCompression ", L, "72eb6db9d7a09a0cc25eaac1aafa97b7");
}
@Test(enabled = true)
public void testMultipleIntervals() {
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76");
RRTest("testMultipleIntervals ", intervals, "104b1a1d9fa5394c6fea95cd32967b78");
}
@Test(enabled = true)
public void testHighCompression() {
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047");
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "c55140cec60fa8c35161680289d74d47");
}
@Test(enabled = true)
public void testLowCompression() {
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "afd39459c841b68a442abdd5ef5f8f27");
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "0f2e57b7f6de03cc4da1ffcc8cf8f1a7");
}
@Test(enabled = true)
public void testIndelCompression() {
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973");
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "dda0c95f56f90e5f633c2437c2b21031");
}
@Test(enabled = true)

View File

@ -46,33 +46,33 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d");
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93");
}
@Test
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3");
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b");
}
@Test
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d");
}
@Test
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4");
}
@Test
public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7");
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459");
}
@Test
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7");
}
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -46,6 +47,9 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
@Output(required = true)
private PrintStream out;
@Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false)
private int coverageThreshold = 20;
@Override
// Look to see if the region has sufficient coverage
public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
@ -53,8 +57,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
// note the linear probability scale
int coverageThreshold = 20;
return new ActivityProfileResult(Math.min((double) depth / coverageThreshold, 1));
return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));
}

View File

@ -312,8 +312,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// add the pool values for each genotype
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed, for this pool"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed, for this pool"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
}
if (UAC.referenceSampleName != null) {
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth"));

View File

@ -36,6 +36,8 @@ public final class VCFConstants {
public static final String MLE_ALLELE_COUNT_KEY = "MLEAC";
public static final String ALLELE_FREQUENCY_KEY = "AF";
public static final String MLE_ALLELE_FREQUENCY_KEY = "MLEAF";
public static final String MLE_PER_SAMPLE_ALLELE_COUNT_KEY = "MLPSAC";
public static final String MLE_PER_SAMPLE_ALLELE_FRACTION_KEY = "MLPSAF";
public static final String ALLELE_NUMBER_KEY = "AN";
public static final String RMS_BASE_QUALITY_KEY = "BQ";
public static final String CIGAR_KEY = "CIGAR";