Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2011-12-19 16:53:07 +01:00
commit 85dc2239ad
14 changed files with 117 additions and 141 deletions

View File

@ -12,7 +12,7 @@ if ( onCMDLine ) {
inputFileName = args[1] inputFileName = args[1]
outputPDF = args[2] outputPDF = args[2]
} else { } else {
inputFileName = "Q-8271@gsa2.jobreport.txt" inputFileName = "Q-26618@gsa4.jobreport.txt"
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
outputPDF = NA outputPDF = NA
@ -129,9 +129,11 @@ plotGroup <- function(groupTable) {
# as above, but averaging over all iterations # as above, but averaging over all iterations
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
if ( dim(sub)[1] > 1 ) { if ( dim(sub)[1] > 1 ) {
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) try({ # need a try here because we will fail to reduce when there's just a single iteration
textplot(as.data.frame(sum), show.rownames=F) sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
title(paste("Job summary for", name, "averaging over all iterations"), cex=3) textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
}, silent=T)
} }
} }
@ -193,6 +195,7 @@ plotJobsGantt(gatkReportData, F, F)
plotProgressByTime(gatkReportData) plotProgressByTime(gatkReportData)
plotTimeByHost(gatkReportData) plotTimeByHost(gatkReportData)
for ( group in gatkReportData ) { for ( group in gatkReportData ) {
print(group)
plotGroup(group) plotGroup(group)
} }

View File

@ -100,11 +100,6 @@ public class SAMDataSource {
*/ */
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>(); private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
/**
* Cached representation of the merged header used to generate a merging iterator.
*/
private final SamFileHeaderMerger headerMerger;
/** /**
* The merged header. * The merged header.
*/ */
@ -300,9 +295,8 @@ public class SAMDataSource {
initializeReaderPositions(readers); initializeReaderPositions(readers);
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true); mergedHeader = readers.getMergedHeader();
mergedHeader = headerMerger.getMergedHeader(); hasReadGroupCollisions = readers.hasReadGroupCollisions();
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
readProperties = new ReadProperties( readProperties = new ReadProperties(
samFiles, samFiles,
@ -327,9 +321,9 @@ public class SAMDataSource {
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups(); List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
for(SAMReadGroupRecord readGroup: readGroups) { for(SAMReadGroupRecord readGroup: readGroups) {
if(headerMerger.hasReadGroupCollisions()) { if(hasReadGroupCollisions) {
mappingToMerged.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId())); mappingToMerged.put(readGroup.getReadGroupId(),readers.getReadGroupId(id,readGroup.getReadGroupId()));
mergedToOriginalReadGroupMappings.put(headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()),readGroup.getReadGroupId()); mergedToOriginalReadGroupMappings.put(readers.getReadGroupId(id,readGroup.getReadGroupId()),readGroup.getReadGroupId());
} else { } else {
mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId()); mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId()); mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
@ -562,7 +556,7 @@ public class SAMDataSource {
*/ */
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) { private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
// Set up merging to dynamically merge together multiple BAMs. // Set up merging to dynamically merge together multiple BAMs.
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true); MergingSamRecordIterator mergingIterator = readers.createMergingIterator();
for(SAMReaderID id: getReaderIDs()) { for(SAMReaderID id: getReaderIDs()) {
CloseableIterator<SAMRecord> iterator = null; CloseableIterator<SAMRecord> iterator = null;
@ -707,6 +701,11 @@ public class SAMDataSource {
* A collection of readers derived from a reads metadata structure. * A collection of readers derived from a reads metadata structure.
*/ */
private class SAMReaders implements Iterable<SAMFileReader> { private class SAMReaders implements Iterable<SAMFileReader> {
/**
* Cached representation of the merged header used to generate a merging iterator.
*/
private final SamFileHeaderMerger headerMerger;
/** /**
* Internal storage for a map of id -> reader. * Internal storage for a map of id -> reader.
*/ */
@ -798,6 +797,11 @@ public class SAMDataSource {
} }
if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
Collection<SAMFileHeader> headers = new LinkedList<SAMFileHeader>();
for(SAMFileReader reader: readers.values())
headers.add(reader.getFileHeader());
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
} }
final private void printReaderPerformance(final int nExecutedTotal, final private void printReaderPerformance(final int nExecutedTotal,
@ -814,7 +818,37 @@ public class SAMDataSource {
nExecutedInTick, tickDurationInSec, nExecutedInTick, tickDurationInSec,
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
nRemaining, estTimeToComplete, estTimeToComplete / 60)); nRemaining, estTimeToComplete, estTimeToComplete / 60));
}
/**
* Return the header derived from the merging of these BAM files.
* @return the merged header.
*/
public SAMFileHeader getMergedHeader() {
return headerMerger.getMergedHeader();
}
/**
* Do multiple read groups collide in this dataset?
* @return True if multiple read groups collide; false otherwis.
*/
public boolean hasReadGroupCollisions() {
return headerMerger.hasReadGroupCollisions();
}
/**
* Get the newly mapped read group ID for the given read group.
* @param readerID Reader for which to discern the transformed ID.
* @param originalReadGroupID Original read group.
* @return Remapped read group.
*/
public String getReadGroupId(final SAMReaderID readerID, final String originalReadGroupID) {
SAMFileHeader header = readers.get(readerID).getFileHeader();
return headerMerger.getReadGroupId(header,originalReadGroupID);
}
public MergingSamRecordIterator createMergingIterator() {
return new MergingSamRecordIterator(headerMerger,readers.values(),true);
} }
/** /**
@ -866,25 +900,6 @@ public class SAMDataSource {
public boolean isEmpty() { public boolean isEmpty() {
return readers.isEmpty(); return readers.isEmpty();
} }
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileReader> values() {
return readers.values();
}
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileHeader> headers() {
ArrayList<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
for (SAMFileReader reader : values())
headers.add(reader.getFileHeader());
return headers;
}
} }
class ReaderInitializer implements Callable<ReaderInitializer> { class ReaderInitializer implements Callable<ReaderInitializer> {

View File

@ -38,9 +38,9 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipreads.ClippingOp; import org.broadinstitute.sting.utils.clipping.ClippingOp;
import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipreads.ReadClipper; import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -63,27 +63,26 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) { private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM); final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET);
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM); final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET);
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET); final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET); final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET); final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF);
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET); final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF);
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
return (numer * numer) / denom; return (numer * numer) / denom;
} }
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) { private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) {
final double likelihoodVector[] = new double[triosToTest.size() * 2]; final double likelihoodVector[] = new double[triosToTest.size()];
int iii = 0; int iii = 0;
for( final Sample child : triosToTest ) { for( final Sample child : triosToTest ) {
final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector(); final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx];
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
} }
return MathUtils.sumLog10(likelihoodVector); return MathUtils.sumLog10(likelihoodVector);

View File

@ -34,10 +34,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
*/ */
public class AlleleFrequencyCalculationResult { public class AlleleFrequencyCalculationResult {
// note that the cell at position zero in the likelihoods/posteriors array is actually probability of non-ref (since it's marginalized over all alleles) // IMPORTANT NOTE:
// These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N).
// For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that
// the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may
// be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0).
// In the bi-allelic case (where there are no other alternate alleles over which to marginalize),
// the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED.
final double[][] log10AlleleFrequencyLikelihoods; final double[][] log10AlleleFrequencyLikelihoods;
final double[][] log10AlleleFrequencyPosteriors; final double[][] log10AlleleFrequencyPosteriors;
// These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
double log10LikelihoodOfAFzero = 0.0; double log10LikelihoodOfAFzero = 0.0;
double log10PosteriorOfAFzero = 0.0; double log10PosteriorOfAFzero = 0.0;

View File

@ -493,12 +493,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
if (DISCORDANCE_ONLY) { if (DISCORDANCE_ONLY) {
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation()); Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
if (!isDiscordant(vc, compVCs)) if (!isDiscordant(vc, compVCs))
return 0; continue;
} }
if (CONCORDANCE_ONLY) { if (CONCORDANCE_ONLY) {
Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation()); Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation());
if (!isConcordant(vc, compVCs)) if (!isConcordant(vc, compVCs))
return 0; continue;
} }
if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
@ -512,16 +512,20 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
VariantContext sub = subsetRecord(vc, samples); VariantContext sub = subsetRecord(vc, samples);
if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
boolean failedJexlMatch = false;
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
if ( !VariantContextUtils.match(sub, jexl) ) { if ( !VariantContextUtils.match(sub, jexl) ) {
return 0; failedJexlMatch = true;
break;
} }
} }
if (SELECT_RANDOM_NUMBER) { if ( !failedJexlMatch ) {
randomlyAddVariant(++variantNumber, sub); if (SELECT_RANDOM_NUMBER) {
} randomlyAddVariant(++variantNumber, sub);
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { }
vcfWriter.add(sub); else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
vcfWriter.add(sub);
}
} }
} }
} }

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils.clipreads; package org.broadinstitute.sting.utils.clipping;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils.clipreads; package org.broadinstitute.sting.utils.clipping;
/** /**
* How should we represent a clipped bases in a read? * How should we represent a clipped bases in a read?

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils.clipreads; package org.broadinstitute.sting.utils.clipping;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;

View File

@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test @Test
public void testTDTAnnotation() { public void testTDTAnnotation() {
final String MD5 = "9fe37b61aab695ad47ce3c587148e91f"; final String MD5 = "204e67536a17af7eaa6bf0a910818997";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1, " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,

View File

@ -116,6 +116,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testUsingDbsnpName--" + testFile, spec); executeTest("testUsingDbsnpName--" + testFile, spec);
} }
@Test
public void testMultipleRecordsAtOnePosition() {
String testFile = validationDataLocation + "selectVariants.onePosition.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("20b52c96f5c48258494d072752b53693")
);
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
}
@Test @Test
public void testParallelization() { public void testParallelization() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils.clipreads; package org.broadinstitute.sting.utils.clipping;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert; import org.testng.Assert;
import java.util.*; import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -17,7 +19,7 @@ import java.util.*;
* Time: 6:45 AM * Time: 6:45 AM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class ClipReadsTestUtils { public class ReadClipperTestUtils {
//Should contain all the utils needed for tests to mass produce //Should contain all the utils needed for tests to mass produce
//reads, cigars, and other needed classes //reads, cigars, and other needed classes

View File

@ -23,7 +23,7 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE. * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.utils.clipreads; package org.broadinstitute.sting.utils.clipping;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
@ -50,13 +50,13 @@ public class ReadClipperUnitTest extends BaseTest {
@BeforeClass @BeforeClass
public void init() { public void init() {
cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize); cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize);
} }
@Test(enabled = true) @Test(enabled = true)
public void testHardClipBothEndsByReferenceCoordinates() { public void testHardClipBothEndsByReferenceCoordinates() {
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart(); int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd(); int alnEnd = read.getAlignmentEnd();
int readLength = alnStart - alnEnd; int readLength = alnStart - alnEnd;
@ -71,7 +71,7 @@ public class ReadClipperUnitTest extends BaseTest {
@Test(enabled = true) @Test(enabled = true)
public void testHardClipByReadCoordinates() { public void testHardClipByReadCoordinates() {
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength(); int readLength = read.getReadLength();
for (int i=0; i<readLength; i++) { for (int i=0; i<readLength; i++) {
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReadCoordinates(0, i); GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReadCoordinates(0, i);
@ -86,7 +86,7 @@ public class ReadClipperUnitTest extends BaseTest {
@Test(enabled = true) @Test(enabled = true)
public void testHardClipByReferenceCoordinates() { public void testHardClipByReferenceCoordinates() {
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart(); int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd(); int alnEnd = read.getAlignmentEnd();
for (int i=alnStart; i<=alnEnd; i++) { for (int i=alnStart; i<=alnEnd; i++) {
@ -108,7 +108,7 @@ public class ReadClipperUnitTest extends BaseTest {
@Test(enabled = true) @Test(enabled = true)
public void testHardClipByReferenceCoordinatesLeftTail() { public void testHardClipByReferenceCoordinatesLeftTail() {
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart(); int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd(); int alnEnd = read.getAlignmentEnd();
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
@ -124,7 +124,7 @@ public class ReadClipperUnitTest extends BaseTest {
@Test(enabled = true) @Test(enabled = true)
public void testHardClipByReferenceCoordinatesRightTail() { public void testHardClipByReferenceCoordinatesRightTail() {
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart(); int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd(); int alnEnd = read.getAlignmentEnd();
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
@ -144,7 +144,7 @@ public class ReadClipperUnitTest extends BaseTest {
// create a read for every cigar permutation // create a read for every cigar permutation
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength(); int readLength = read.getReadLength();
byte [] quals = new byte[readLength]; byte [] quals = new byte[readLength];
@ -216,7 +216,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR); GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
ReadClipper lowQualClipper = new ReadClipper(read); ReadClipper lowQualClipper = new ReadClipper(read);
ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected); ReadClipperTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
} }
@Test(enabled = true) @Test(enabled = true)
@ -224,7 +224,7 @@ public class ReadClipperUnitTest extends BaseTest {
// Generate a list of cigars to test // Generate a list of cigars to test
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
ReadClipper readClipper = new ReadClipper(read); ReadClipper readClipper = new ReadClipper(read);
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases(); GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
@ -276,12 +276,12 @@ public class ReadClipperUnitTest extends BaseTest {
public void testHardClipLeadingInsertions() { public void testHardClipLeadingInsertions() {
for (Cigar cigar : cigarList) { for (Cigar cigar : cigarList) {
if (startsWithInsertion(cigar)) { if (startsWithInsertion(cigar)) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions(); GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar()); int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar());
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar())) if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
expectedLength -= leadingInsertionLength(ClipReadsTestUtils.invertCigar(read.getCigar())); expectedLength -= leadingInsertionLength(ReadClipperTestUtils.invertCigar(read.getCigar()));
if (! clippedRead.isEmpty()) { if (! clippedRead.isEmpty()) {
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there

View File

@ -1,67 +0,0 @@
package org.broadinstitute.sting.utils.clipreads;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/27/11
* Time: 5:17 AM
* To change this template use File | Settings | File Templates.
*/
public class ClippingOpUnitTest extends BaseTest {
ClippingOp clippingOp;
GATKSAMRecord read;
@BeforeTest
public void init() {
read = ClipReadsTestUtils.makeRead();
}
@Test
private void testHardClip() {
// List<TestParameter> testList = new LinkedList<TestParameter>();
// testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
// testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
// testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
// testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
// testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
// testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
//
// for (TestParameter p : testList) {
// init();
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
// ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
// ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
// p.cigar);
// }
}
@Test
private void testSoftClip() {
// List<TestParameter> testList = new LinkedList<TestParameter>();
// testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
// testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
// testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
// testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
// testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
// testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
//
// for (TestParameter p : testList) {
// init();
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
// ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
// }
}
}