Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-08-06 20:22:31 -04:00
commit 97c5ed4feb
10 changed files with 61 additions and 159 deletions

View File

@ -533,27 +533,18 @@ public class GenotypingEngine {
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
case I:
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base
boolean allN = true;
for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base
if( insertionBases[i] != (byte) 'N' ) {
allN = false;
break;
}
}
if( !allN ) {
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} else {
insertionAlleles.add( Allele.create(insertionBases, false) );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
}
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
} else {
byte[] insertionBases = new byte[]{};
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
insertionAlleles.add( Allele.create(insertionBases, false) );
}
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
alignmentPos += elementLength;
break;
case S:

View File

@ -416,7 +416,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList );
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
final ArrayList<Haplotype> bestHaplotypes = haplotypes;// ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES

View File

@ -82,11 +82,11 @@ public class KBestPaths {
}
}
protected static class PathComparatorLowestEdge implements Comparator<Path> {
public int compare(final Path path1, final Path path2) {
return path2.lowestEdge - path1.lowestEdge;
}
}
//protected static class PathComparatorLowestEdge implements Comparator<Path> {
// public int compare(final Path path1, final Path path2) {
// return path2.lowestEdge - path1.lowestEdge;
// }
//}
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
@ -99,7 +99,7 @@ public class KBestPaths {
}
}
Collections.sort(bestPaths, new PathComparatorLowestEdge() );
Collections.sort(bestPaths, new PathComparatorTotalScore() );
Collections.reverse(bestPaths);
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
}
@ -114,8 +114,8 @@ public class KBestPaths {
if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
// clean out some low scoring paths
Collections.sort(bestPaths, new PathComparatorLowestEdge() );
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); }
Collections.sort(bestPaths, new PathComparatorTotalScore() );
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
}
bestPaths.add(path);
} else if( n.val > 10000) {

View File

@ -294,7 +294,7 @@ public class LikelihoodCalculationEngine {
int hap1 = 0;
int hap2 = 0;
//double bestElement = Double.NEGATIVE_INFINITY;
final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 );
final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 );
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
double maxElement = Double.NEGATIVE_INFINITY;
for( int iii = 0; iii < numHaplotypes; iii++ ) {

View File

@ -292,7 +292,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
@ -322,7 +322,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
//final int sizeOfActiveRegion = activeRegionStop - activeRegionStart;
if( haplotype == null ) { return false; }
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );

View File

@ -20,17 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "eff4c820226abafcaa058c66585198a7");
HCTest(CEUTRIO_BAM, "", "29ebfabcd4a42d4c5c2a576219cffb3d");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "2b40b314e6e63ae165186b55b14eee41");
HCTest(NA12878_BAM, "", "9732313b8a12faa347f6ebe96518c5df");
}
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "553870cc4d7e66f30862f8ae5dee01ff");
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "5e1d49d4110cd96c2e25f8e1da217e9e");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -41,7 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "0936c41e8f006174f7cf27d97235133e");
HCTestComplexVariants(CEUTRIO_BAM, "", "53df51e6071664725f6e7497f5ee5adf");
}
}

View File

@ -331,7 +331,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
double lodCutoff = 0.0;
for( final Tranche tranche : tranches ) {
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) {
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001) == 0 ) {
lodCutoff = tranche.minVQSLod;
}
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -170,49 +171,17 @@ public class Haplotype {
}
@Requires({"refInsertLocation >= 0"})
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) {
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
return new Haplotype(bases.clone());
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
return null;
}
byte[] newHaplotype;
try {
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
newHaplotype = bases.clone();
for( int iii = 0; iii < altAllele.length(); iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
} else if( refAllele.length() < altAllele.length() ) { // insertion
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length + altAlleleLength];
for( int iii = 0; iii < bases.length; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
}
for( int iii = 0; iii < altAlleleLength; iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1];
}
} else { // deletion
final int shift = refAllele.length() - altAllele.length();
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length - shift];
for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) {
newHaplotype[iii] = bases[iii+shift];
}
}
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
return new Haplotype(bases.clone());
}
return new Haplotype(newHaplotype);
byte[] newHaplotypeBases = new byte[]{};
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
return new Haplotype(newHaplotypeBases);
}
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,

View File

@ -31,6 +31,8 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -53,11 +55,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTTACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 16, h1Cigar, bases, h1bases);
}
@Test
@ -68,11 +70,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases);
}
@Test
@ -102,11 +104,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
basicInsertTest("C", "CACTT", 6, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
basicInsertTest("G", "GACTT", 16, h1Cigar, bases, h1bases);
}
@Test
@ -120,12 +122,12 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
String h1bases = "A" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ATCG", "A", 0, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATAAAG" + "AGGGGGA" + "AGGC";
basicInsertTest("CGATC", "AAA", 6, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
basicInsertTest("GGGGG", "G", 16, h1Cigar, bases, h1bases);
}
@Test
@ -148,13 +150,16 @@ public class HaplotypeUnitTest extends BaseTest {
}
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
final Haplotype h = new Haplotype(hap.getBytes());
final Allele h1refAllele = Allele.create(ref, true);
final Allele h1altAllele = Allele.create(alt, false);
final ArrayList<Allele> alleles = new ArrayList<Allele>();
alleles.add(h1refAllele);
alleles.add(h1altAllele);
final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make();
h.setAlignmentStartHapwrtRef(0);
h.setCigar(cigar);
final Haplotype h1 = h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE);
final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc);
final Haplotype h1expected = new Haplotype(newHap.getBytes());
Assert.assertEquals(h1, h1expected);
}

View File

@ -1,64 +0,0 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
/**
* User: depristo
* Date: 8/3/12
* Time: 12:26 PM
* To change this template use File | Settings | File Templates.
*/
public class AdaptiveContextUnitTest {
// TODO
// TODO actually need unit tests when we have validated the value of this approach
// TODO particularly before we attempt to optimize the algorithm
// TODO
// --------------------------------------------------------------------------------
//
// Provider
//
// --------------------------------------------------------------------------------
private class AdaptiveContextTestProvider extends BaseTest.TestDataProvider {
final RecalDatumNode<ContextDatum> pruned;
final RecalDatumNode<ContextDatum> full;
private AdaptiveContextTestProvider(Class c, RecalDatumNode<ContextDatum> pruned, RecalDatumNode<ContextDatum> full) {
super(AdaptiveContextTestProvider.class);
this.pruned = pruned;
this.full = full;
}
}
private RecalDatumNode<ContextDatum> makeTree(final String context, final int N, final int M,
final RecalDatumNode<ContextDatum> ... sub) {
final ContextDatum contextDatum = new ContextDatum(context, N, M);
final RecalDatumNode<ContextDatum> node = new RecalDatumNode<ContextDatum>(contextDatum);
for ( final RecalDatumNode<ContextDatum> sub1 : sub ) {
node.addSubnode(sub1);
}
return node;
}
@DataProvider(name = "AdaptiveContextTestProvider")
public Object[][] makeRecalDatumTestProvider() {
// final RecalDatumNode<ContextDatum> prune1 =
// makeTree("A", 10, 1,
// makeTree("AA", 11, 2),
// makeTree("AC", 12, 3),
// makeTree("AG", 13, 4),
// makeTree("AT", 14, 5));
//
// new AdaptiveContextTestProvider(pruned, full);
return AdaptiveContextTestProvider.getTests(AdaptiveContextTestProvider.class);
}
@Test(dataProvider = "AdaptiveContextTestProvider")
public void testAdaptiveContextFill(AdaptiveContextTestProvider cfg) {
}
}