diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/Haplotype.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/Haplotype.java index 688f05934..b39aa1b42 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/Haplotype.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/Haplotype.java @@ -49,6 +49,9 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; class Haplotype extends BaseArray implements Cloneable { public Haplotype(byte[] bases) { @@ -91,4 +94,19 @@ class Haplotype extends BaseArray implements Cloneable { public Haplotype subHaplotype(int fromIndex, int toIndex) { return new Haplotype(Arrays.copyOfRange(bases, fromIndex, Math.min(toIndex, size()))); } + + public Haplotype subHaplotype(Set inds) { + List basesList = new LinkedList(); + for (int i : inds) { + if (0 <= i && i < bases.length) + basesList.add(bases[i]); + } + + Byte[] newBases = new Byte[basesList.size()]; + int index = 0; + for (Byte b : basesList) + newBases[index++] = b; + + return new Haplotype(newBases); + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 2a31b5425..707bf2722 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -449,9 +449,9 @@ public class PhaseByTransmission extends RodWalker, HashMa Set vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); //Get the trios from the families passed as ped - setTrios(); + setTrios(vcfSamples); if(trios.size()<1) - throw new UserException.BadInput("No PED file passed or no trios found in PED file. Aborted."); + throw new UserException.BadInput("No PED file passed or no *non-skipped* trios found in PED file. Aborted."); Set headerLines = new HashSet(); @@ -470,13 +470,23 @@ public class PhaseByTransmission extends RodWalker, HashMa /** * Select trios and parent/child pairs only */ - private void setTrios(){ + private void setTrios(Set vcfSamples){ - Map> families = this.getSampleDB().getFamilies(); + Map> families = this.getSampleDB().getFamilies(vcfSamples); Set family; ArrayList parents; for(Map.Entry> familyEntry : families.entrySet()){ family = familyEntry.getValue(); + + // Since getFamilies(vcfSamples) above still returns parents of samples in the VCF even if those parents are not in the VCF, need to subset down here: + Set familyMembersInVCF = new TreeSet(); + for(Sample familyMember : family){ + if (vcfSamples.contains(familyMember.getID())) { + familyMembersInVCF.add(familyMember); + } + } + family = familyMembersInVCF; + if(family.size()<2 || family.size()>3){ logger.info(String.format("Caution: Family %s has %d members; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyEntry.getKey(),family.size())); } @@ -487,8 +497,7 @@ public class PhaseByTransmission extends RodWalker, HashMa if(family.containsAll(parents)) this.trios.add(familyMember); else - logger.info(String.format("Caution: Family %s skipped as it is not a trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyEntry.getKey())); - break; + logger.info(String.format("Caution: Child %s of family %s skipped as info is not provided as a complete trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Child skipped.", familyMember.getID(), familyEntry.getKey())); } } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index a297b38cf..7ed77b845 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -82,6 +82,12 @@ import static org.broadinstitute.sting.utils.variant.GATKVCFUtils.getVCFHeadersF /** * Walks along all variant ROD loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using upstream and downstream reads). * + * The current implementation works for diploid SNPs, and will transparently (but properly) ignore other sites. + * + * The underlying algorithm is based on building up 2^n local haplotypes, + * where n is the number of heterozygous SNPs in the local region we expected to find phase-informative reads (and assumes a maximum value of maxPhaseSites, a user parameter). + * Then, these 2^n haplotypes are used to determine, with sufficient certainty (the assigned PQ score), to which haplotype the alleles of a genotype at a particular locus belong (denoted by the HP tag). + * *

* Performs physical phasing of SNP calls, based on sequencing reads. *

@@ -141,7 +147,7 @@ public class ReadBackedPhasing extends RodWalker P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9 + protected Double phaseQualityThresh = 20.0; // PQ = 20.0 <=> P(error) = 10^(-20/10) = 0.01, P(correct) = 0.99 @Hidden @Argument(fullName = "variantStatsFilePrefix", shortName = "variantStats", doc = "The prefix of the VCF/phasing statistics files [For DEBUGGING purposes only - DO NOT USE!]", required = false) @@ -161,12 +167,6 @@ public class ReadBackedPhasing extends RodWalker unphasedSiteQueue = null; @@ -175,6 +175,7 @@ public class ReadBackedPhasing extends RodWalker processQueue(PhasingStats phaseStats, boolean processAll) { List oldPhasedList = new LinkedList(); @@ -359,6 +362,7 @@ public class ReadBackedPhasing extends RodWalker discardIrrelevantPhasedSites() { List vcList = new LinkedList(); @@ -402,27 +406,30 @@ public class ReadBackedPhasing extends RodWalker prevHetAndInteriorIt = phaseWindow.prevHetAndInteriorIt; - /* Notes: - 1. Call to next() advances iterator to next position in partiallyPhasedSites. - 2. prevHetGenotype != null, since otherwise prevHetAndInteriorIt would not have been chosen to point to its UnfinishedVariantAndReads. - */ - UnfinishedVariantContext prevUvc = prevHetAndInteriorIt.next().unfinishedVariant; - Genotype prevHetGenotype = prevUvc.getGenotype(samp); + // Create the list of all het genotypes preceding this one (and in the phasing window as contained in partiallyPhasedSites): + List prevHetGenotypes = new LinkedList(); + CloneableIteratorLinkedList.CloneableIterator phasedIt = partiallyPhasedSites.iterator(); + while (phasedIt.hasNext()) { + UnfinishedVariantAndReads phasedVr = phasedIt.next(); + Genotype prevGt = phasedVr.unfinishedVariant.getGenotype(samp); + if (prevGt != null && isUnfilteredCalledDiploidGenotype(prevGt) && prevGt.isHet()) { + GenotypeAndReadBases grb = new GenotypeAndReadBases(prevGt, phasedVr.sampleReadBases.get(samp), phasedVr.unfinishedVariant.getLocation()); + prevHetGenotypes.add(grb); + if (DEBUG) logger.debug("Using UPSTREAM het site = " + grb.loc); + } + } + + SNPallelePair allelePair = new SNPallelePair(gt); + if (DEBUG) logger.debug("Want to phase TOP vs. BOTTOM for: " + "\n" + allelePair); + + boolean phasedCurGenotypeRelativeToPrevious = false; + for (int goBackFromEndOfPrevHets = 0; goBackFromEndOfPrevHets < prevHetGenotypes.size(); goBackFromEndOfPrevHets++) { + PhasingWindow phaseWindow = new PhasingWindow(vr, samp, prevHetGenotypes, goBackFromEndOfPrevHets); PhaseResult pr = phaseSampleAtSite(phaseWindow); - boolean genotypesArePhased = passesPhasingThreshold(pr.phaseQuality); + phasedCurGenotypeRelativeToPrevious = passesPhasingThreshold(pr.phaseQuality); if (pr.phasingContainsInconsistencies) { if (DEBUG) @@ -430,44 +437,43 @@ public class ReadBackedPhasing extends RodWalker= phaseQualityThresh; } + // A genotype and the base pileup that supports it private static class GenotypeAndReadBases { public Genotype genotype; public ReadBasesAtPosition readBases; @@ -510,75 +531,43 @@ public class ReadBackedPhasing extends RodWalker prevHetAndInteriorIt = null; + + private int phaseRelativeToIndex = -1; private int phasingSiteIndex = -1; + private Map readsAtHetSites = null; - private void clearFields() { - hetGenotypes = null; - prevHetAndInteriorIt = null; - phasingSiteIndex = -1; - readsAtHetSites = null; - } - - public boolean hasPreviousHets() { - return phasingSiteIndex > 0; + public Genotype phaseRelativeToGenotype() { + return hetGenotypes[phaseRelativeToIndex]; } // ASSUMES that: isUnfilteredCalledDiploidGenotype(vrGt) && vrGt.isHet() [vrGt = vr.variant.getGenotype(sample)] - public PhasingWindow(VariantAndReads vr, String sample) { - List listHetGenotypes = new LinkedList(); + public PhasingWindow(VariantAndReads vr, String sample, List prevHetGenotypes, int goBackFromEndOfPrevHets) { + if (prevHetGenotypes.isEmpty() || goBackFromEndOfPrevHets >= prevHetGenotypes.size()) // no previous sites against which to phase + throw new ReviewedStingException("Should never get empty set of previous sites to phase against"); - // Include previously phased sites in the phasing computation: - CloneableIteratorLinkedList.CloneableIterator phasedIt = partiallyPhasedSites.iterator(); - while (phasedIt.hasNext()) { - UnfinishedVariantAndReads phasedVr = phasedIt.next(); - Genotype gt = phasedVr.unfinishedVariant.getGenotype(sample); - if (gt == null || !isUnfilteredCalledDiploidGenotype(gt)) { // constructed haplotype must start AFTER this "break" - listHetGenotypes.clear(); // clear out any history - } - else if (gt.isHet()) { - GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, phasedVr.sampleReadBases.get(sample), phasedVr.unfinishedVariant.getLocation()); - listHetGenotypes.add(grb); - if (DEBUG) logger.debug("Using UPSTREAM het site = " + grb.loc); - prevHetAndInteriorIt = phasedIt.clone(); - } - } + // Include these previously phased sites in the phasing computation: + List listHetGenotypes = new LinkedList(prevHetGenotypes); + + phaseRelativeToIndex = listHetGenotypes.size() - 1 - goBackFromEndOfPrevHets; phasingSiteIndex = listHetGenotypes.size(); - if (phasingSiteIndex == 0) { // no previous sites against which to phase - clearFields(); - return; - } - prevHetAndInteriorIt.previous(); // so that it points to the previous het site [and NOT one after it, due to the last call to next()] - if (respectPhaseInInput) { - Genotype prevHetGenotype = prevHetAndInteriorIt.clone().next().unfinishedVariant.getGenotype(sample); - if (!prevHetGenotype.isPhased()) { - // Make this genotype unphaseable, since its previous het is not already phased [as required by respectPhaseInInput]: - clearFields(); - return; - } - } - - // Add the (het) position to be phased: + // Add the (het) position to be phased [at phasingSiteIndex]: GenomeLoc phaseLocus = GATKVariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vr.variant); GenotypeAndReadBases grbPhase = new GenotypeAndReadBases(vr.variant.getGenotype(sample), vr.sampleReadBases.get(sample), phaseLocus); listHetGenotypes.add(grbPhase); - if (DEBUG) - logger.debug("PHASING het site = " + grbPhase.loc + " [phasingSiteIndex = " + phasingSiteIndex + "]"); + if (DEBUG) logger.debug("PHASING het site = " + grbPhase.loc + " [phasingSiteIndex = " + phasingSiteIndex + "]"); // Include as-of-yet unphased sites in the phasing computation: for (VariantAndReads nextVr : unphasedSiteQueue) { if (!startDistancesAreInWindowRange(vr.variant, nextVr.variant)) //nextVr too far ahead of the range used for phasing vc break; Genotype gt = nextVr.variant.getGenotype(sample); - if (gt == null || !isUnfilteredCalledDiploidGenotype(gt)) { // constructed haplotype must end BEFORE this "break" - break; - } - else if (gt.isHet()) { + if (gt != null && isUnfilteredCalledDiploidGenotype(gt) && gt.isHet()) { GenotypeAndReadBases grb = new GenotypeAndReadBases(gt, nextVr.sampleReadBases.get(sample), GATKVariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), nextVr.variant)); listHetGenotypes.add(grb); if (DEBUG) logger.debug("Using DOWNSTREAM het site = " + grb.loc); @@ -595,7 +584,7 @@ public class ReadBackedPhasing extends RodWalker maxPhaseSites) { listHetGenotypes = trimWindow(listHetGenotypes, sample, phaseLocus); @@ -609,14 +598,14 @@ public class ReadBackedPhasing extends RodWalker listHetGenotypes, String sample, GenomeLoc phasingLoc) { buildReadsAtHetSites(listHetGenotypes, sample, phasingLoc, null); } @@ -665,6 +654,7 @@ public class ReadBackedPhasing extends RodWalker> edgeReads; @@ -710,6 +700,7 @@ public class ReadBackedPhasing extends RodWalker removeExtraneousReads(int numHetSites) { PhasingGraph readGraph = new PhasingGraph(numHetSites); EdgeToReads edgeToReads = new EdgeToReads(); @@ -737,7 +728,7 @@ public class ReadBackedPhasing extends RodWalker keepReads = new HashSet(); - /* Check which Reads are involved in acyclic paths from (phasingSiteIndex - 1) to (phasingSiteIndex): + /* Check which Reads are involved in acyclic paths from phaseRelativeToIndex to (phasingSiteIndex): In detail: Every Read links EACH pair of sites for which it contains bases. Then, each such edge is added to a "site connectivity graph". @@ -783,7 +774,7 @@ public class ReadBackedPhasing extends RodWalker removeExtraneousSites(List listHetGenotypes) { Set sitesWithReads = new HashSet(); for (Map.Entry nameToReads : readsAtHetSites.entrySet()) { @@ -866,53 +858,119 @@ public class ReadBackedPhasing extends RodWalker keepHetSites = new LinkedList(); int index = 0; - int numPrecedingRemoved = 0; + int numPrecedingPhaseRelativeToSiteRemoved = 0; + int numPrecedingPhasingSiteRemoved = 0; for (GenotypeAndReadBases grb : listHetGenotypes) { boolean keepSite = sitesWithReads.contains(index); if (DEBUG && logger.isDebugEnabled() && !keepSite) logger.debug("Removing read-less site " + grb.loc); - if (keepSite || index == phasingSiteIndex || index == phasingSiteIndex - 1) { + if (keepSite || index == phasingSiteIndex || index == phaseRelativeToIndex) { keepHetSites.add(grb); if (!keepSite) if (DEBUG) logger.debug("Although current or previous sites have no relevant reads, continuing empty attempt to phase them [for sake of program flow]..."); } - else if (index <= phasingSiteIndex) - numPrecedingRemoved++; + else { + if (index <= phaseRelativeToIndex) + numPrecedingPhaseRelativeToSiteRemoved++; + if (index <= phasingSiteIndex) + numPrecedingPhasingSiteRemoved++; + } index++; } - phasingSiteIndex -= numPrecedingRemoved; + phaseRelativeToIndex -= numPrecedingPhaseRelativeToSiteRemoved; + phasingSiteIndex -= numPrecedingPhasingSiteRemoved; return keepHetSites; } + /* Auxilary object to sort candidate het sites with which to phase the index site, + where sorting is performed based on distance to the index site + (since presumably closer sites will have greater numbers of overlapping reads) + */ + private class SortSitesBySumOfDist implements Comparator { + private Vector grb; + + public SortSitesBySumOfDist(List listHetGenotypes) { + grb = new Vector(listHetGenotypes); + } + + public int compare(Integer i1, Integer i2) { + int d1 = calcGenomicDist(i1); + int d2 = calcGenomicDist(i2); + + if (d1 != d2) + return d1 - d2; + + int id1 = calcIndexDist(i1); + int id2 = calcIndexDist(i2); + if (id1 != id2) + return id1 - id2; + + return i1 - i2; + } + + private int calcGenomicDist(int i) { + int d1 = grb.get(i).loc.distance(grb.get(phaseRelativeToIndex).loc); + int d2 = grb.get(i).loc.distance(grb.get(phasingSiteIndex).loc); + + return d1 + d2; + } + + private int calcIndexDist(int i) { + int d1 = Math.abs(i - phaseRelativeToIndex); + int d2 = Math.abs(i - phasingSiteIndex); + + return d1 + d2; + } + } + + // Create a "phasing window" of het sites to use for phasing the index site, but limiting to only maxPhaseSites het sites to incorporate [as specified by the user] private List trimWindow(List listHetGenotypes, String sample, GenomeLoc phaseLocus) { if (DEBUG) logger.warn("Trying to phase sample " + sample + " at locus " + phaseLocus + " within a window of " + cacheWindow + " bases yields " + listHetGenotypes.size() + " heterozygous sites to phase:\n" + toStringGRL(listHetGenotypes)); - int prevSiteIndex = phasingSiteIndex - 1; // index of previous in listHetGenotypes - int numToUse = maxPhaseSites - 2; // since always keep previous and current het sites! - - int numOnLeft = prevSiteIndex; - int numOnRight = listHetGenotypes.size() - (phasingSiteIndex + 1); - - int useOnLeft, useOnRight; - if (numOnLeft <= numOnRight) { - int halfToUse = numToUse / 2; // skimp on the left [floor], and be generous with the right side - useOnLeft = Math.min(halfToUse, numOnLeft); - useOnRight = Math.min(numToUse - useOnLeft, numOnRight); + Set scoreAllIndices = new TreeSet(new SortSitesBySumOfDist(listHetGenotypes)); + for (int i = 0; i < listHetGenotypes.size(); ++i) { + if (i != phaseRelativeToIndex && i != phasingSiteIndex) + scoreAllIndices.add(i); } - else { // numOnRight < numOnLeft - int halfToUse = new Double(Math.ceil(numToUse / 2.0)).intValue(); // be generous with the right side [ceil] - useOnRight = Math.min(halfToUse, numOnRight); - useOnLeft = Math.min(numToUse - useOnRight, numOnLeft); + + Set keepIndices = new TreeSet(); + // always keep these two indices: + keepIndices.add(phaseRelativeToIndex); + keepIndices.add(phasingSiteIndex); + for (int addInd : scoreAllIndices) { + if (keepIndices.size() >= maxPhaseSites) + break; + else // keepIndices.size() < maxPhaseSites + keepIndices.add(addInd); } - int startIndex = prevSiteIndex - useOnLeft; - int stopIndex = phasingSiteIndex + useOnRight + 1; // put the index 1 past the desired index to keep - phasingSiteIndex -= startIndex; - listHetGenotypes = listHetGenotypes.subList(startIndex, stopIndex); + + List newListHetGenotypes = new LinkedList(); + int newPhaseRelativeToIndex = -1; + int newPhasingSiteIndex = -1; + int oldIndex = 0; + int newIndex = 0; + for (GenotypeAndReadBases grb : listHetGenotypes) { + if (keepIndices.contains(oldIndex)) { + newListHetGenotypes.add(grb); + + if (oldIndex == phaseRelativeToIndex) + newPhaseRelativeToIndex = newIndex; + if (oldIndex == phasingSiteIndex) + newPhasingSiteIndex = newIndex; + + ++newIndex; + } + ++oldIndex; + } + + phaseRelativeToIndex = newPhaseRelativeToIndex; + phasingSiteIndex = newPhasingSiteIndex; + listHetGenotypes = newListHetGenotypes; if (DEBUG) logger.warn("NAIVELY REDUCED to " + listHetGenotypes.size() + " sites:\n" + toStringGRL(listHetGenotypes)); @@ -920,11 +978,13 @@ public class ReadBackedPhasing extends RodWalker 2!"); + String[] curPairNames = prevPairNames; + byte prevBase = hap.getBase(0); // The 1st base in the haplotype byte curBase = hap.getBase(1); // The 2nd base in the haplotype boolean chosePrevTopChrom = prevAllelePair.matchesTopBase(prevBase); boolean choseCurTopChrom = curAllelePair.matchesTopBase(curBase); - if (chosePrevTopChrom != choseCurTopChrom) - curAllelePair.swapAlleles(); + if (chosePrevTopChrom != choseCurTopChrom) { + //curAllelePair.swapAlleles(); + + /* Instead of swapping the alleles (as we used to above), + we swap the haplotype names to fit the unswapped alleles as they are ordered in the Genotype: + */ + curPairNames = new String[]{prevPairNames[1], prevPairNames[0]}; + } + + return curPairNames; } private boolean startDistancesAreInWindowRange(VariantContext vc1, VariantContext vc2) { @@ -1060,8 +1131,8 @@ public class ReadBackedPhasing extends RodWalker sampleReadBases; @@ -1157,6 +1229,7 @@ public class ReadBackedPhasing extends RodWalker sampleReadBases; @@ -1256,6 +1329,9 @@ public class ReadBackedPhasing extends RodWalker hapMap = new TreeMap(); for (PhasingTable.PhasingTableEntry pte : table) { @@ -1309,23 +1388,26 @@ public class ReadBackedPhasing extends RodWalker marginalizeInds; - public TableCreatorOfHaplotypeAndComplementForDiploidAlleles(Genotype[] hetGenotypes, int startIndex, int marginalizeLength) { + public TableCreatorOfHaplotypeAndComplementForDiploidAlleles(Genotype[] hetGenotypes, int[] marginalizeInds) { super(hetGenotypes); this.SNPallelePairs = new SNPallelePair[genotypes.length]; for (int i = 0; i < genotypes.length; i++) SNPallelePairs[i] = new SNPallelePair(genotypes[i]); - this.startIndex = startIndex; - this.marginalizeLength = marginalizeLength; + this.marginalizeInds = new TreeSet(); + for (int mind : marginalizeInds) + this.marginalizeInds.add(mind); } public PhasingTable getNewTable() { + int startIndex = marginalizeInds.iterator().next(); + PhasingTable table = new PhasingTable(); for (Haplotype hap : getAllHaplotypes()) { if (SNPallelePairs[startIndex].matchesTopBase(hap.getBase(startIndex))) { @@ -1337,8 +1419,7 @@ public class ReadBackedPhasing extends RodWalker { private LinkedList table; @@ -1406,6 +1491,7 @@ public class ReadBackedPhasing extends RodWalker { private HaplotypeClass haplotypeClass; private PhasingScore score; @@ -1470,6 +1557,7 @@ public class ReadBackedPhasing extends RodWalker multipleBaseCounts = null; @@ -1586,6 +1674,7 @@ class HaplotypeClass implements Iterable { } } +// Summary statistics about phasing rates, for each sample class PhasingStats { private int numReads; private int numVarSites; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 9759004a0..8c8817fe6 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -72,7 +72,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:332341-382503", 1, - Arrays.asList("1c9a7fe4db41864cd85d16e5cf88986c")); + Arrays.asList("1bb034bd54421fe4884e3142ed92d47e")); executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec); } @@ -82,7 +82,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:1232503-1332503", 1, - Arrays.asList("a3ca151145379e0d4bae64a91165ea0b")); + Arrays.asList("c12954252d4c8659b5ecf7517b277496")); executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec); } @@ -92,7 +92,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30) + " -L chr20:332341-382503", 1, - Arrays.asList("f685803333123a102ce1851d984cbd10")); + Arrays.asList("0b945e30504d04e9c6fa659ca5c25ed5")); executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec); } @@ -102,7 +102,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100) + " -L chr20:332341-382503", 1, - Arrays.asList("aaa7c25d118383639f273128d241e140")); + Arrays.asList("e9e8ef92d694ca71f29737fba26282f5")); executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec); } @@ -112,7 +112,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10) + " -L chr20:332341-482503", 1, - Arrays.asList("418e29400762972e77bae4f73e16befe")); + Arrays.asList("b9c9347c760a06db635952bf4920fb48")); executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec); } @@ -122,7 +122,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:652810-681757", 1, - Arrays.asList("4c8f6190ecc86766baba3aba08542991")); + Arrays.asList("02c3a903842aa035ae379f16bc3d64ae")); executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); } @@ -132,18 +132,8 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10) + " -L chr20:332341-802503", 1, - Arrays.asList("44eb225ab3167651ec0a9e1fdcc83d34")); - executeTest("Use trio-phased VCF, but ignore its phasing [TEST SEVEN]", spec); - } - - @Test - public void test8() { - WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10) - + " -L chr20:332341-802503" + " -respectPhaseInInput", - 1, - Arrays.asList("e3549b89d49092e73cc6eb21f233471c")); - executeTest("Use trio-phased VCF, and respect its phasing [TEST EIGHT]", spec); + Arrays.asList("ac41d1aa9c9a67c07d894f485c29c574")); + executeTest("Use trio-phased VCF, adding read-backed phasing infomration in HP tag (as is now standard for RBP) [TEST SEVEN]", spec); } } diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala index 7dd771873..9bb031c38 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala @@ -196,7 +196,7 @@ class xhmmCNVpipeline extends QScript { } addAll(docs) - val mergeDepths = new MergeGATKdepths(docs.map(u => u.intervalSampleOut), outputBase.getPath + RD_OUTPUT_SUFFIX, "_mean_cvg", xhmmExec, sampleIDsMap, sampleIDsMapFromColumn, sampleIDsMapToColumn, None, false) with WholeMatrixMemoryLimit + val mergeDepths = new MergeGATKdepths(docs.map(u => u.intervalSampleOut), outputBase.getPath + RD_OUTPUT_SUFFIX, "_mean_cvg", xhmmExec, sampleIDsMap, sampleIDsMapFromColumn, sampleIDsMapToColumn, None, false) with WholeMatrixMemoryLimit with LongRunTime add(mergeDepths) var excludeTargets : List[File] = List[File]() @@ -305,7 +305,7 @@ class xhmmCNVpipeline extends QScript { } } - class FilterCenterRawMatrix(inputParam: File, excludeTargetsIn : List[File]) extends CommandLineFunction with WholeMatrixMemoryLimit { + class FilterCenterRawMatrix(inputParam: File, excludeTargetsIn : List[File]) extends CommandLineFunction with WholeMatrixMemoryLimit with LongRunTime { @Input(doc = "") val input = inputParam @@ -335,7 +335,7 @@ class xhmmCNVpipeline extends QScript { override def description = "Filters samples and targets and then mean-centers the targets: " + command } - class PCA(inputParam: File) extends CommandLineFunction with WholeMatrixMemoryLimit { + class PCA(inputParam: File) extends CommandLineFunction with WholeMatrixMemoryLimit with LongRunTime { @Input(doc = "") val input = inputParam @@ -358,7 +358,7 @@ class xhmmCNVpipeline extends QScript { override def description = "Runs PCA on mean-centered data: " + command } - class Normalize(pca: PCA) extends CommandLineFunction { + class Normalize(pca: PCA) extends CommandLineFunction with LongRunTime { @Input(doc = "") val input = pca.input @@ -387,7 +387,7 @@ class xhmmCNVpipeline extends QScript { override def description = "Normalizes mean-centered data using PCA information: " + command } - class FilterAndZscoreNormalized(inputParam: File) extends CommandLineFunction with WholeMatrixMemoryLimit { + class FilterAndZscoreNormalized(inputParam: File) extends CommandLineFunction with WholeMatrixMemoryLimit with LongRunTime { @Input(doc = "") val input = inputParam @@ -413,7 +413,7 @@ class xhmmCNVpipeline extends QScript { override def description = "Filters and z-score centers (by sample) the PCA-normalized data: " + command } - class FilterOriginalData(inputParam: File, filt1: FilterCenterRawMatrix, filt2: FilterAndZscoreNormalized) extends CommandLineFunction with WholeMatrixMemoryLimit { + class FilterOriginalData(inputParam: File, filt1: FilterCenterRawMatrix, filt2: FilterAndZscoreNormalized) extends CommandLineFunction with WholeMatrixMemoryLimit with LongRunTime { @Input(doc = "") val input = inputParam