Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Matt Hanna 2011-06-29 14:53:17 -04:00
commit 47499ebc24
9 changed files with 119 additions and 42 deletions

View File

@ -978,7 +978,7 @@
<!-- Build gsalib R module -->
<target name="gsalib">
<exec executable="R" failonerror="true">
<arg line="R CMD INSTALL -l R/ R/src/gsalib/" />
<arg line="R CMD INSTALL -l private/R/ private/R/src/gsalib/" />
</exec>
</target>
</project>

View File

@ -127,9 +127,9 @@ public class VariantContextAdaptors {
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
if ( DbSNPHelper.isDeletion(dbsnp) ) {
int index = ref.getLocus().getStart() - ref.getWindow().getStart() - 1;
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
throw new ReviewedStingException("DbSNP conversion requested using a reference context with no window; we will fail to convert deletions");
return null; // we weren't given enough reference context to create the VariantContext
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
}
Collection<Genotype> genotypes = null;

View File

@ -4,9 +4,11 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import net.sf.samtools.*;
import java.util.Arrays;
import java.util.HashMap;
@ -67,20 +69,95 @@ public class ReadPosRankSumTest extends RankSumTest {
altLikelihood = like;
}
}
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p.getOffset(), 0, 0);
final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead());
int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
final int numAlignedBases = getNumAlignedBases(p.getRead());
int rp = readPos;
if( readPos > numAlignedBases / 2 ) {
readPos = numAlignedBases - ( readPos + 1 );
}
//if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases);
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH))
// if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads
// where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping.
// if (readPos < -1)
// return;
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) {
refQuals.add((double)readPos);
else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH))
//if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos);
}
else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) {
altQuals.add((double)readPos);
//if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos);
}
}
}
}
int getNumClippedBasesAtStart(SAMRecord read) {
// compute total number of clipped bases (soft or hard clipped)
// check for hard clips (never consider these bases):
final Cigar c = read.getCigar();
final CigarElement first = c.getCigarElement(0);
int numStartClippedBases = 0;
if (first.getOperator() == CigarOperator.H) {
numStartClippedBases = first.getLength();
}
byte[] unclippedReadBases = read.getReadBases();
byte[] unclippedReadQuals = read.getBaseQualities();
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
// and may leave a string of Q2 bases still hanging off the reads.
for (int i=numStartClippedBases; i < unclippedReadBases.length; i++) {
if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
numStartClippedBases++;
else
break;
}
return numStartClippedBases;
}
int getNumAlignedBases(SAMRecord read) {
return read.getReadLength() - getNumClippedBasesAtStart(read) - getNumClippedBasesAtEnd(read);
}
int getNumClippedBasesAtEnd(SAMRecord read) {
// compute total number of clipped bases (soft or hard clipped)
// check for hard clips (never consider these bases):
final Cigar c = read.getCigar();
CigarElement last = c.getCigarElement(c.numCigarElements()-1);
int numEndClippedBases = 0;
if (last.getOperator() == CigarOperator.H) {
numEndClippedBases = last.getLength();
}
byte[] unclippedReadBases = read.getReadBases();
byte[] unclippedReadQuals = read.getBaseQualities();
// Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative,
// and may leave a string of Q2 bases still hanging off the reads.
for (int i=unclippedReadBases.length-numEndClippedBases-1; i >= 0; i-- ){
if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
numEndClippedBases++;
else
break;
}
return numEndClippedBases;
}
int getOffsetFromClippedReadStart(SAMRecord read, int offset) {
return offset - getNumClippedBasesAtStart(read);
}
}

View File

@ -32,9 +32,9 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate;
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDataManager;
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalDatum;
import org.broadinstitute.sting.walkers.IndelCountCovariates.RecalibrationArgumentCollection;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection;
*/import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
@ -57,7 +57,7 @@ import java.util.regex.Pattern;
public class PairHMMIndelErrorModel {
private final int BASE_QUAL_THRESHOLD = 10;
public static final int BASE_QUAL_THRESHOLD = 20;
private static final int MATCH_OFFSET = 0;

View File

@ -190,7 +190,7 @@ public class VCFUtils {
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
return result;
}

View File

@ -19,12 +19,12 @@ public class VariantContextIntegrationTest extends WalkerTest {
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-L 1:1-10000 --printPerLocus", "493bf9bcde93c08c8c46f72c8e98cf2f");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "bd9e062b23c2c48fef8c299dc1d294d5");
expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "91c6a9489256d9ce77c8fedf7221a961");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc");
@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
// this really just tests that we are seeing the same number of objects over all of chr1
WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s",
1, // just one output file
Arrays.asList("d2a3f2fe329a0a64145cfd19fde45b99"));
Arrays.asList("529f936aa6c303658b23caf4e527782f"));
executeTest("testLargeScaleConversion", spec);
}
}

View File

@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("a604a64252a8538b7d13f52bd068f797"));
Arrays.asList("258e1954e6ae55c89abc6a716e19cbe0"));
executeTest("test MultiSample Pilot1", spec);
}
@ -54,12 +54,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("5844eda3596732a16c8559f5bfbe1723"));
Arrays.asList("edeb1db288a24baff59575ceedd94243"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("db4664a1785c4efb4cd9057478aa846f"));
Arrays.asList("581990130d90071b084024f4cd7caf91"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -67,7 +67,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("36c70ec27a25f88fe2364bba2f961843"));
Arrays.asList("d120db27d694a6da32367cc4fb5770fa"));
executeTest("test SingleSample Pilot2", spec);
}
@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "212eab2024903997625ba98009063226";
private final static String COMPRESSED_OUTPUT_MD5 = "75e5c430ed39f79f24e375037a388dc4";
@Test
public void testCompressedOutput() {
@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "f83a33a1ecc350cae0c002e4a43a7861";
String md5 = "a29615dd37222a11b8dadd341b53e43c";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -138,9 +138,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--min_base_quality_score 26", "d10d0be159d80e22b9c81970ee098daf" );
e.put( "--min_mapping_quality_score 26", "f76099c403b60b6045a0ae7d9f589dc4" );
e.put( "--p_nonref_model GRID_SEARCH", "cda395fdf7352e07537610f52a6d0cdc" );
e.put( "--min_base_quality_score 26", "93e6269e38db9bc1732555e9969e3648" );
e.put( "--min_mapping_quality_score 26", "64be99183c100caed4aa5f8bad64c7e9" );
e.put( "--p_nonref_model GRID_SEARCH", "0592fe33f705ad8e2f13619fcf157805" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -153,9 +153,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "9b85d9c10d634315d20aefa565dbab60" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "245abbb39de43e89f63918a6771c0c14" );
e.put( "--output_mode EMIT_ALL_SITES", "fb7a59b318ecdb46fd96024be7e41e0e" );
e.put( "-sites_only", "1483e637dc0279935a7f90d136d147bb" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "adcd91bc7dae8020df8caf1a30060e98" );
e.put( "--output_mode EMIT_ALL_SITES", "b708acc2fa40f336bcd2d0c70091e07e" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -169,12 +169,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("f76099c403b60b6045a0ae7d9f589dc4"));
Arrays.asList("64be99183c100caed4aa5f8bad64c7e9"));
executeTest("test confidence 1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("879e5ab09bd0d37e0300dd34ec09db81"));
Arrays.asList("e76ca54232d02f0d92730e1affeb804e"));
executeTest("test confidence 2", spec2);
}
@ -186,8 +186,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "c7123f7b84b402f4959db950326afc13" );
e.put( 1.0 / 1850, "75e6043a68265ab6deb284bb753801f9" );
e.put( 0.01, "18d37f7f107853b5e32c757b4e143205" );
e.put( 1.0 / 1850, "2bcb90ce2f7542bf590f7612018fae8e" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("3f45b2af75123e48b89fa1759c444ec0"));
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -230,7 +230,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("cede928592575e617f1323866348c256"));
Arrays.asList("0919ab7e513c377610e23a67d33608fa"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq OFF",
1,
Arrays.asList("3f45b2af75123e48b89fa1759c444ec0"));
Arrays.asList("825f05b31b5bb7e82231a15c7e4e2b0d"));
executeTest(String.format("test calling with BAQ OFF"), spec);
}
@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("7fe14d81f12d5d57e3a522b2a4f07fc6"));
Arrays.asList("cb37348c41b8181be829912730f747e1"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("a7da8acce1957334619f3dfeac3d1379"));
Arrays.asList("ca5b6a5fb53ae401b146cc3044f454f2"));
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
}
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("234b6c336890cc6d9a495bc40f09d126"));
Arrays.asList("ca4343a4ab6d3cce94ce61d7d1910f81"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -301,14 +301,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("9e342e3b73ae4887620195417e1af44a"));
Arrays.asList("3f555b53e9dd14cf7cdf96c24e322364"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("f265726403ca3f28518cb4290b7bee84"));
Arrays.asList("1b9764b783acf7822edc58e6822eef5b"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}

View File

@ -52,7 +52,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
WalkerTestSpec spec2 = new WalkerTestSpec(
baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod",
1,
Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d"));
Arrays.asList("05a114623c126b0398fbc1703437461e"));
executeTest("realigner known indels only from dbsnp", spec2);
}

View File

@ -40,7 +40,7 @@ public class BatchMergeIntegrationTest extends WalkerTest {
+ " -B:alleles,VCF " + alleles
+ " -I " + bam,
1,
Arrays.asList("b7839064dc4979400af4792460d9884b"));
Arrays.asList("f4ed8f4ef2cba96823c06e90e9d0de35"));
executeTest("testBatchMerge UG genotype given alleles:" + new File(bam).getName() + " with " + new File(alleles).getName(), spec);
}
}