Merge pull request #103 from broadinstitute/md_fragutils
Cleanup FragmentUtils; Add concept of strandless reads
This commit is contained in:
commit
3b4dca1b94
|
|
@ -47,6 +47,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
import cern.jet.math.Arithmetic;
|
import cern.jet.math.Arithmetic;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -74,6 +75,8 @@ import java.util.*;
|
||||||
* calculated for certain complex indel cases or for multi-allelic sites.
|
* calculated for certain complex indel cases or for multi-allelic sites.
|
||||||
*/
|
*/
|
||||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||||
|
private final static Logger logger = Logger.getLogger(FisherStrand.class);
|
||||||
|
|
||||||
private static final String FS = "FS";
|
private static final String FS = "FS";
|
||||||
private static final double MIN_PVALUE = 1E-320;
|
private static final double MIN_PVALUE = 1E-320;
|
||||||
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
|
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
|
||||||
|
|
@ -95,6 +98,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||||
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc);
|
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||||
|
// logger.info("VC " + vc);
|
||||||
|
// printTable(table, 0.0);
|
||||||
return pValueForBestTable(table, null);
|
return pValueForBestTable(table, null);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
@ -131,9 +136,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
private Map<String, Object> annotationForOneTable(final double pValue) {
|
private Map<String, Object> annotationForOneTable(final double pValue) {
|
||||||
final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(Math.max(pValue, MIN_PVALUE))); // prevent INFINITYs
|
final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(Math.max(pValue, MIN_PVALUE))); // prevent INFINITYs
|
||||||
return Collections.singletonMap(FS, value);
|
return Collections.singletonMap(FS, value);
|
||||||
// Map<String, Object> map = new HashMap<String, Object>();
|
|
||||||
// map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pValue)));
|
|
||||||
// return map;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
|
|
@ -192,7 +194,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
|
|
||||||
|
|
||||||
private static void printTable(int[][] table, double pValue) {
|
private static void printTable(int[][] table, double pValue) {
|
||||||
System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue);
|
logger.info(String.format("%d %d; %d %d : %f", table[0][0], table[0][1], table[1][0], table[1][1], pValue));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean rotateTable(int[][] table) {
|
private static boolean rotateTable(int[][] table) {
|
||||||
|
|
@ -315,13 +317,21 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
final boolean matchesAlt = allele.equals(alt, true);
|
final boolean matchesAlt = allele.equals(alt, true);
|
||||||
|
|
||||||
if ( matchesRef || matchesAlt ) {
|
if ( matchesRef || matchesAlt ) {
|
||||||
|
final int row = matchesRef ? 0 : 1;
|
||||||
|
|
||||||
final boolean isFW = !read.getReadNegativeStrandFlag();
|
if ( read.isStrandless() ) {
|
||||||
|
// a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
|
||||||
int row = matchesRef ? 0 : 1;
|
// (the 1 is to ensure that a strandless read always counts as an observation on both strands, even
|
||||||
int column = isFW ? 0 : 1;
|
// if the read is only seen once, because it's a merged read or other)
|
||||||
|
final int toAdd = Math.max(representativeCount / 2, 1);
|
||||||
table[row][column] += representativeCount;
|
table[row][0] += toAdd;
|
||||||
|
table[row][1] += toAdd;
|
||||||
|
} else {
|
||||||
|
// a normal read with an actual strand
|
||||||
|
final boolean isFW = !read.getReadNegativeStrandFlag();
|
||||||
|
final int column = isFW ? 0 : 1;
|
||||||
|
table[row][column] += representativeCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -567,7 +567,7 @@ public class SlidingWindow {
|
||||||
ObjectArrayList<GATKSAMRecord> result = new ObjectArrayList<GATKSAMRecord>();
|
ObjectArrayList<GATKSAMRecord> result = new ObjectArrayList<GATKSAMRecord>();
|
||||||
|
|
||||||
if (filteredDataConsensus == null)
|
if (filteredDataConsensus == null)
|
||||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
|
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, isNegativeStrand);
|
||||||
|
|
||||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
for (int index = start; index < end; index++) {
|
for (int index = start; index < end; index++) {
|
||||||
|
|
@ -583,7 +583,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) {
|
if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) {
|
||||||
result.add(finalizeFilteredDataConsensus());
|
result.add(finalizeFilteredDataConsensus());
|
||||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
|
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, isNegativeStrand);
|
||||||
}
|
}
|
||||||
|
|
||||||
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts(), headerElement.getRMS());
|
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts(), headerElement.getRMS());
|
||||||
|
|
@ -606,7 +606,7 @@ public class SlidingWindow {
|
||||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||||
if (runningConsensus == null)
|
if (runningConsensus == null)
|
||||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
|
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, isNegativeStrand);
|
||||||
|
|
||||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
for (int index = start; index < end; index++) {
|
for (int index = start; index < end; index++) {
|
||||||
|
|
|
||||||
|
|
@ -124,8 +124,7 @@ public class SyntheticRead {
|
||||||
|
|
||||||
|
|
||||||
private final ObjectArrayList<SingleBaseInfo> basesCountsQuals;
|
private final ObjectArrayList<SingleBaseInfo> basesCountsQuals;
|
||||||
private double mappingQuality; // the average of the rms of the mapping qualities of all the reads that contributed to this consensus
|
private double mappingQuality;
|
||||||
private String readTag;
|
|
||||||
|
|
||||||
// Information to produce a GATKSAMRecord
|
// Information to produce a GATKSAMRecord
|
||||||
private SAMFileHeader header;
|
private SAMFileHeader header;
|
||||||
|
|
@ -147,14 +146,12 @@ public class SyntheticRead {
|
||||||
* @param contigIndex the read's contig index
|
* @param contigIndex the read's contig index
|
||||||
* @param readName the read's name
|
* @param readName the read's name
|
||||||
* @param refStart the alignment start (reference based)
|
* @param refStart the alignment start (reference based)
|
||||||
* @param readTag the reduce reads tag for the synthetic read
|
|
||||||
*/
|
*/
|
||||||
public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, String readTag, boolean hasIndelQualities, boolean isNegativeRead) {
|
public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) {
|
||||||
final int initialCapacity = 10000;
|
final int initialCapacity = 10000;
|
||||||
basesCountsQuals = new ObjectArrayList<SingleBaseInfo>(initialCapacity);
|
basesCountsQuals = new ObjectArrayList<SingleBaseInfo>(initialCapacity);
|
||||||
mappingQuality = 0.0;
|
mappingQuality = 0.0;
|
||||||
|
|
||||||
this.readTag = readTag;
|
|
||||||
this.header = header;
|
this.header = header;
|
||||||
this.readGroupRecord = readGroupRecord;
|
this.readGroupRecord = readGroupRecord;
|
||||||
this.contig = contig;
|
this.contig = contig;
|
||||||
|
|
@ -165,13 +162,12 @@ public class SyntheticRead {
|
||||||
this.isNegativeStrand = isNegativeRead;
|
this.isNegativeStrand = isNegativeRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SyntheticRead(ObjectArrayList<BaseIndex> bases, ByteArrayList counts, ByteArrayList quals, ByteArrayList insertionQuals, ByteArrayList deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) {
|
public SyntheticRead(ObjectArrayList<BaseIndex> bases, ByteArrayList counts, ByteArrayList quals, ByteArrayList insertionQuals, ByteArrayList deletionQuals, double mappingQuality, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) {
|
||||||
basesCountsQuals = new ObjectArrayList<SingleBaseInfo>(bases.size());
|
basesCountsQuals = new ObjectArrayList<SingleBaseInfo>(bases.size());
|
||||||
for (int i = 0; i < bases.size(); ++i) {
|
for (int i = 0; i < bases.size(); ++i) {
|
||||||
basesCountsQuals.add(new SingleBaseInfo(bases.get(i).getOrdinalByte(), counts.get(i), quals.get(i), insertionQuals.get(i), deletionQuals.get(i)));
|
basesCountsQuals.add(new SingleBaseInfo(bases.get(i).getOrdinalByte(), counts.get(i), quals.get(i), insertionQuals.get(i), deletionQuals.get(i)));
|
||||||
}
|
}
|
||||||
this.mappingQuality = mappingQuality;
|
this.mappingQuality = mappingQuality;
|
||||||
this.readTag = readTag;
|
|
||||||
this.header = header;
|
this.header = header;
|
||||||
this.readGroupRecord = readGroupRecord;
|
this.readGroupRecord = readGroupRecord;
|
||||||
this.contig = contig;
|
this.contig = contig;
|
||||||
|
|
@ -228,7 +224,7 @@ public class SyntheticRead {
|
||||||
read.setReadBases(convertReadBases());
|
read.setReadBases(convertReadBases());
|
||||||
read.setMappingQuality((int) Math.ceil(mappingQuality / basesCountsQuals.size()));
|
read.setMappingQuality((int) Math.ceil(mappingQuality / basesCountsQuals.size()));
|
||||||
read.setReadGroup(readGroupRecord);
|
read.setReadGroup(readGroupRecord);
|
||||||
read.setAttribute(readTag, convertBaseCounts());
|
read.setReducedReadCounts(convertBaseCounts());
|
||||||
|
|
||||||
if (hasIndelQualities) {
|
if (hasIndelQualities) {
|
||||||
read.setBaseQualities(convertInsertionQualities(), EventType.BASE_INSERTION);
|
read.setBaseQualities(convertInsertionQualities(), EventType.BASE_INSERTION);
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ public void testBaseCounts() {
|
||||||
new TestRead(bases, quals, new byte[] {1, 127, 51, 126}, new byte [] {1, 126, 50, 125})};
|
new TestRead(bases, quals, new byte[] {1, 127, 51, 126}, new byte [] {1, 126, 50, 125})};
|
||||||
|
|
||||||
for (TestRead testRead : testReads) {
|
for (TestRead testRead : testReads) {
|
||||||
SyntheticRead syntheticRead = new SyntheticRead(new ObjectArrayList<BaseIndex>(testRead.getBases()), new ByteArrayList(testRead.getCounts()), new ByteArrayList(testRead.getQuals()), new ByteArrayList(testRead.getInsQuals()), new ByteArrayList(testRead.getDelQuals()), artificialMappingQuality, GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false);
|
SyntheticRead syntheticRead = new SyntheticRead(new ObjectArrayList<BaseIndex>(testRead.getBases()), new ByteArrayList(testRead.getCounts()), new ByteArrayList(testRead.getQuals()), new ByteArrayList(testRead.getInsQuals()), new ByteArrayList(testRead.getDelQuals()), artificialMappingQuality, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false);
|
||||||
Assert.assertEquals(syntheticRead.convertBaseCounts(), testRead.getExpectedCounts());
|
Assert.assertEquals(syntheticRead.convertBaseCounts(), testRead.getExpectedCounts());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleComplex() {
|
public void testHaplotypeCallerMultiSampleComplex() {
|
||||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a2232995ca9bec143e664748845a0045");
|
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "b83b53741edb07218045d6f25f20a18b");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||||
|
|
|
||||||
|
|
@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSample() {
|
public void testHaplotypeCallerMultiSample() {
|
||||||
HCTest(CEUTRIO_BAM, "", "8f33e40686443b9a72de45d5a9da1861");
|
HCTest(CEUTRIO_BAM, "", "4a2880f0753e6e813b9e0c35209b3708");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerSingleSample() {
|
public void testHaplotypeCallerSingleSample() {
|
||||||
HCTest(NA12878_BAM, "", "8f2b047cdace0ef122d6ad162e7bc5b9");
|
HCTest(NA12878_BAM, "", "588892934f2e81247bf32e457db88449");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = false)
|
||||||
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGA() {
|
public void testHaplotypeCallerMultiSampleGGA() {
|
||||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||||
"9d4be26a2c956ba4b7b4044820eab030");
|
"fa1b92373c89d2238542a319ad25c257");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||||
|
|
@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void HCTestStructuralIndels() {
|
public void HCTestStructuralIndels() {
|
||||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("03557376242bdf78c5237703b762573b"));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("9296f1af6cf1f1cc4b79494eb366e976"));
|
||||||
executeTest("HCTestStructuralIndels: ", spec);
|
executeTest("HCTestStructuralIndels: ", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -134,7 +134,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestReducedBam() {
|
public void HCTestReducedBam() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||||
Arrays.asList("adb08cb25e902cfe0129404a682b2169"));
|
Arrays.asList("cf0a1bfded656153578df6cf68aa68a2"));
|
||||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -142,7 +142,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||||
Arrays.asList("a43c595a617589388ff3d7e2ddc661e7"));
|
Arrays.asList("addceb63f5bfa9f11e15335d5bf641e9"));
|
||||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -415,6 +415,24 @@ public class Utils {
|
||||||
return C;
|
return C;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concatenates byte arrays
|
||||||
|
* @return a concat of all bytes in allBytes in order
|
||||||
|
*/
|
||||||
|
public static byte[] concat(final byte[] ... allBytes) {
|
||||||
|
int size = 0;
|
||||||
|
for ( final byte[] bytes : allBytes ) size += bytes.length;
|
||||||
|
|
||||||
|
final byte[] c = new byte[size];
|
||||||
|
int offset = 0;
|
||||||
|
for ( final byte[] bytes : allBytes ) {
|
||||||
|
System.arraycopy(bytes, 0, c, offset, bytes.length);
|
||||||
|
offset += bytes.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Appends String(s) B to array A.
|
* Appends String(s) B to array A.
|
||||||
* @param A First array.
|
* @param A First array.
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.fragments;
|
package org.broadinstitute.sting.utils.fragments;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
|
|
@ -56,7 +58,8 @@ import java.util.*;
|
||||||
* Date: 3/26/11
|
* Date: 3/26/11
|
||||||
* Time: 10:09 PM
|
* Time: 10:09 PM
|
||||||
*/
|
*/
|
||||||
public class FragmentUtils {
|
public final class FragmentUtils {
|
||||||
|
protected final static byte MIN_QUAL_BAD_OVERLAP = 16;
|
||||||
private FragmentUtils() {} // private constructor
|
private FragmentUtils() {} // private constructor
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -65,18 +68,28 @@ public class FragmentUtils {
|
||||||
* Allows us to write a generic T -> Fragment algorithm that works with any object containing
|
* Allows us to write a generic T -> Fragment algorithm that works with any object containing
|
||||||
* a read.
|
* a read.
|
||||||
*
|
*
|
||||||
* @param <T>
|
* @param <T> The type of the object that contains a GATKSAMRecord
|
||||||
*/
|
*/
|
||||||
public interface ReadGetter<T> {
|
public interface ReadGetter<T> {
|
||||||
|
/**
|
||||||
|
* Get the GATKSAMRecord associated with object
|
||||||
|
*
|
||||||
|
* @param object the thing that contains the read
|
||||||
|
* @return a non-null GATKSAMRecord read
|
||||||
|
*/
|
||||||
public GATKSAMRecord get(T object);
|
public GATKSAMRecord get(T object);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Identify getter for SAMRecords themselves */
|
/**
|
||||||
|
* Identify getter for SAMRecords themselves
|
||||||
|
*/
|
||||||
private final static ReadGetter<GATKSAMRecord> SamRecordGetter = new ReadGetter<GATKSAMRecord>() {
|
private final static ReadGetter<GATKSAMRecord> SamRecordGetter = new ReadGetter<GATKSAMRecord>() {
|
||||||
@Override public GATKSAMRecord get(final GATKSAMRecord object) { return object; }
|
@Override public GATKSAMRecord get(final GATKSAMRecord object) { return object; }
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Gets the SAMRecord in a PileupElement */
|
/**
|
||||||
|
* Gets the SAMRecord in a PileupElement
|
||||||
|
*/
|
||||||
private final static ReadGetter<PileupElement> PileupElementGetter = new ReadGetter<PileupElement>() {
|
private final static ReadGetter<PileupElement> PileupElementGetter = new ReadGetter<PileupElement>() {
|
||||||
@Override public GATKSAMRecord get(final PileupElement object) { return object.getRead(); }
|
@Override public GATKSAMRecord get(final PileupElement object) { return object.getRead(); }
|
||||||
};
|
};
|
||||||
|
|
@ -87,13 +100,20 @@ public class FragmentUtils {
|
||||||
* and returns a FragmentCollection that contains the T objects whose underlying reads either overlap (or
|
* and returns a FragmentCollection that contains the T objects whose underlying reads either overlap (or
|
||||||
* not) with their mate pairs.
|
* not) with their mate pairs.
|
||||||
*
|
*
|
||||||
* @param readContainingObjects
|
* @param readContainingObjects An iterator of objects that contain GATKSAMRecords
|
||||||
* @param nElements
|
* @param nElements the number of elements to be provided by the iterator, which is usually known upfront and
|
||||||
* @param getter
|
* greatly improves the efficiency of the fragment calculation
|
||||||
|
* @param getter a helper function that takes an object of type T and returns is associated GATKSAMRecord
|
||||||
* @param <T>
|
* @param <T>
|
||||||
* @return
|
* @return a fragment collection
|
||||||
*/
|
*/
|
||||||
private final static <T> FragmentCollection<T> create(Iterable<T> readContainingObjects, int nElements, ReadGetter<T> getter) {
|
@Requires({
|
||||||
|
"readContainingObjects != null",
|
||||||
|
"nElements >= 0",
|
||||||
|
"getter != null"
|
||||||
|
})
|
||||||
|
@Ensures("result != null")
|
||||||
|
private static <T> FragmentCollection<T> create(final Iterable<T> readContainingObjects, final int nElements, final ReadGetter<T> getter) {
|
||||||
Collection<T> singletons = null;
|
Collection<T> singletons = null;
|
||||||
Collection<List<T>> overlapping = null;
|
Collection<List<T>> overlapping = null;
|
||||||
Map<String, T> nameMap = null;
|
Map<String, T> nameMap = null;
|
||||||
|
|
@ -145,30 +165,69 @@ public class FragmentUtils {
|
||||||
return new FragmentCollection<T>(singletons, overlapping);
|
return new FragmentCollection<T>(singletons, overlapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static FragmentCollection<PileupElement> create(ReadBackedPileup rbp) {
|
/**
|
||||||
|
* Create a FragmentCollection containing PileupElements from the ReadBackedPileup rbp
|
||||||
|
* @param rbp a non-null read-backed pileup. The elements in this ReadBackedPileup must be ordered
|
||||||
|
* @return a non-null FragmentCollection
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
public static FragmentCollection<PileupElement> create(final ReadBackedPileup rbp) {
|
||||||
|
if ( rbp == null ) throw new IllegalArgumentException("Pileup cannot be null");
|
||||||
return create(rbp, rbp.getNumberOfElements(), PileupElementGetter);
|
return create(rbp, rbp.getNumberOfElements(), PileupElementGetter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static FragmentCollection<GATKSAMRecord> create(List<GATKSAMRecord> reads) {
|
/**
|
||||||
|
* Create a FragmentCollection containing GATKSAMRecords from a list of reads
|
||||||
|
*
|
||||||
|
* @param reads a non-null list of reads, ordered by their start location
|
||||||
|
* @return a non-null FragmentCollection
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
public static FragmentCollection<GATKSAMRecord> create(final List<GATKSAMRecord> reads) {
|
||||||
|
if ( reads == null ) throw new IllegalArgumentException("Pileup cannot be null");
|
||||||
return create(reads, reads.size(), SamRecordGetter);
|
return create(reads, reads.size(), SamRecordGetter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static List<GATKSAMRecord> mergeOverlappingPairedFragments( final List<GATKSAMRecord> overlappingPair ) {
|
public static List<GATKSAMRecord> mergeOverlappingPairedFragments( final List<GATKSAMRecord> overlappingPair ) {
|
||||||
final byte MIN_QUAL_BAD_OVERLAP = 16;
|
|
||||||
if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); }
|
if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); }
|
||||||
|
|
||||||
GATKSAMRecord firstRead = overlappingPair.get(0);
|
final GATKSAMRecord firstRead = overlappingPair.get(0);
|
||||||
GATKSAMRecord secondRead = overlappingPair.get(1);
|
final GATKSAMRecord secondRead = overlappingPair.get(1);
|
||||||
|
|
||||||
|
final GATKSAMRecord merged;
|
||||||
|
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
|
||||||
|
merged = mergeOverlappingPairedFragments(secondRead, firstRead);
|
||||||
|
} else {
|
||||||
|
merged = mergeOverlappingPairedFragments(firstRead, secondRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
return merged == null ? overlappingPair : Collections.singletonList(merged);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge two overlapping reads from the same fragment into a single super read, if possible
|
||||||
|
*
|
||||||
|
* firstRead and secondRead must be part of the same fragment (though this isn't checked). Looks
|
||||||
|
* at the bases and alignment, and tries its best to create a meaningful synthetic single super read
|
||||||
|
* that represents the entire sequenced fragment.
|
||||||
|
*
|
||||||
|
* Assumes that firstRead starts before secondRead (according to their soft clipped starts)
|
||||||
|
*
|
||||||
|
* @param firstRead the left most read
|
||||||
|
* @param firstRead the right most read
|
||||||
|
*
|
||||||
|
* @return a strandless merged read of first and second, or null if the algorithm cannot create a meaningful one
|
||||||
|
*/
|
||||||
|
public static GATKSAMRecord mergeOverlappingPairedFragments(final GATKSAMRecord firstRead, final GATKSAMRecord secondRead) {
|
||||||
|
if ( firstRead == null ) throw new IllegalArgumentException("firstRead cannot be null");
|
||||||
|
if ( secondRead == null ) throw new IllegalArgumentException("secondRead cannot be null");
|
||||||
|
if ( ! firstRead.getReadName().equals(secondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + firstRead + " and " + secondRead);
|
||||||
|
|
||||||
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
|
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
|
||||||
firstRead = overlappingPair.get(1); // swap them
|
return null; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
|
||||||
secondRead = overlappingPair.get(0);
|
|
||||||
}
|
|
||||||
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
|
|
||||||
return overlappingPair; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
|
|
||||||
}
|
}
|
||||||
if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) {
|
if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) {
|
||||||
return overlappingPair; // fragments contain indels so don't merge them
|
return null; // fragments contain indels so don't merge them
|
||||||
}
|
}
|
||||||
|
|
||||||
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
|
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
|
||||||
|
|
@ -190,10 +249,10 @@ public class FragmentUtils {
|
||||||
}
|
}
|
||||||
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
||||||
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
|
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
|
||||||
return overlappingPair; // high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them
|
return null; // high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them
|
||||||
}
|
}
|
||||||
if( firstReadQuals[iii] < MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] < MIN_QUAL_BAD_OVERLAP ) {
|
if( firstReadQuals[iii] < MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] < MIN_QUAL_BAD_OVERLAP ) {
|
||||||
return overlappingPair; // both reads have low qual bases in the overlap region so don't merge them because don't know what is going on
|
return null; // both reads have low qual bases in the overlap region so don't merge them because don't know what is going on
|
||||||
}
|
}
|
||||||
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
|
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
|
||||||
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
|
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
|
||||||
|
|
@ -204,6 +263,7 @@ public class FragmentUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() );
|
final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() );
|
||||||
|
returnRead.setIsStrandless(true);
|
||||||
returnRead.setAlignmentStart( firstRead.getSoftStart() );
|
returnRead.setAlignmentStart( firstRead.getSoftStart() );
|
||||||
returnRead.setReadBases( bases );
|
returnRead.setReadBases( bases );
|
||||||
returnRead.setBaseQualities( quals );
|
returnRead.setBaseQualities( quals );
|
||||||
|
|
@ -237,8 +297,6 @@ public class FragmentUtils {
|
||||||
returnRead.setBaseQualities( deletionQuals, EventType.BASE_DELETION );
|
returnRead.setBaseQualities( deletionQuals, EventType.BASE_DELETION );
|
||||||
}
|
}
|
||||||
|
|
||||||
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();
|
return returnRead;
|
||||||
returnList.add(returnRead);
|
|
||||||
return returnList;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,8 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
private int softEnd = UNINITIALIZED;
|
private int softEnd = UNINITIALIZED;
|
||||||
private Integer adapterBoundary = null;
|
private Integer adapterBoundary = null;
|
||||||
|
|
||||||
|
private boolean isStrandlessRead = false;
|
||||||
|
|
||||||
// because some values can be null, we don't want to duplicate effort
|
// because some values can be null, we don't want to duplicate effort
|
||||||
private boolean retrievedReadGroup = false;
|
private boolean retrievedReadGroup = false;
|
||||||
private boolean retrievedReduceReadCounts = false;
|
private boolean retrievedReduceReadCounts = false;
|
||||||
|
|
@ -141,6 +143,45 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return ArtificialSAMUtils.createArtificialRead(cigar);
|
return ArtificialSAMUtils.createArtificialRead(cigar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// *** support for reads without meaningful strand information ***//
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does this read have a meaningful strandedness value?
|
||||||
|
*
|
||||||
|
* Some advanced types of reads, such as reads coming from merged fragments,
|
||||||
|
* don't have meaningful strandedness values, as they are composites of multiple
|
||||||
|
* other reads. Strandless reads need to be handled specially by code that cares about
|
||||||
|
* stranded information, such as FS.
|
||||||
|
*
|
||||||
|
* @return true if this read doesn't have meaningful strand information
|
||||||
|
*/
|
||||||
|
public boolean isStrandless() {
|
||||||
|
return isStrandlessRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the strandless state of this read to isStrandless
|
||||||
|
* @param isStrandless true if this read doesn't have a meaningful strandedness value
|
||||||
|
*/
|
||||||
|
public void setIsStrandless(final boolean isStrandless) {
|
||||||
|
this.isStrandlessRead = isStrandless;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean getReadNegativeStrandFlag() {
|
||||||
|
return ! isStrandless() && super.getReadNegativeStrandFlag();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setReadNegativeStrandFlag(boolean flag) {
|
||||||
|
if ( isStrandless() )
|
||||||
|
throw new IllegalStateException("Cannot set the strand of a strandless read");
|
||||||
|
super.setReadNegativeStrandFlag(flag);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// *** The following methods are overloaded to cache the appropriate data ***//
|
// *** The following methods are overloaded to cache the appropriate data ***//
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
@ -313,6 +354,15 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return getReducedReadCounts() != null;
|
return getReducedReadCounts() != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the reduced read counts for this record to counts
|
||||||
|
* @param counts the count array
|
||||||
|
*/
|
||||||
|
public void setReducedReadCounts(final byte[] counts) {
|
||||||
|
retrievedReduceReadCounts = false;
|
||||||
|
setAttribute(REDUCED_READ_CONSENSUS_TAG, counts);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The number of bases corresponding the i'th base of the reduced read.
|
* The number of bases corresponding the i'th base of the reduced read.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -112,6 +112,19 @@ public class UtilsUnitTest extends BaseTest {
|
||||||
Assert.assertTrue("one-1;two-2;three-1;four-2;five-1;six-2".equals(joined));
|
Assert.assertTrue("one-1;two-2;three-1;four-2;five-1;six-2".equals(joined));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConcat() {
|
||||||
|
final String s1 = "A";
|
||||||
|
final String s2 = "CC";
|
||||||
|
final String s3 = "TTT";
|
||||||
|
final String s4 = "GGGG";
|
||||||
|
Assert.assertEquals(new String(Utils.concat()), "");
|
||||||
|
Assert.assertEquals(new String(Utils.concat(s1.getBytes())), s1);
|
||||||
|
Assert.assertEquals(new String(Utils.concat(s1.getBytes(), s2.getBytes())), s1 + s2);
|
||||||
|
Assert.assertEquals(new String(Utils.concat(s1.getBytes(), s2.getBytes(), s3.getBytes())), s1 + s2 + s3);
|
||||||
|
Assert.assertEquals(new String(Utils.concat(s1.getBytes(), s2.getBytes(), s3.getBytes(), s4.getBytes())), s1 + s2 + s3 + s4);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEscapeExpressions() {
|
public void testEscapeExpressions() {
|
||||||
String[] expected, actual;
|
String[] expected, actual;
|
||||||
|
|
|
||||||
|
|
@ -27,23 +27,30 @@ package org.broadinstitute.sting.utils.fragments;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test routines for read-backed pileup.
|
* Test routines for read-backed pileup.
|
||||||
*/
|
*/
|
||||||
public class FragmentUtilsUnitTest extends BaseTest {
|
public class FragmentUtilsUnitTest extends BaseTest {
|
||||||
private static SAMFileHeader header;
|
private static SAMFileHeader header;
|
||||||
|
private static GATKSAMReadGroupRecord rgForMerged;
|
||||||
|
private final static boolean DEBUG = false;
|
||||||
|
|
||||||
private class FragmentUtilsTest extends TestDataProvider {
|
private class FragmentUtilsTest extends TestDataProvider {
|
||||||
List<TestState> statesForPileup = new ArrayList<TestState>();
|
List<TestState> statesForPileup = new ArrayList<TestState>();
|
||||||
|
|
@ -119,7 +126,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
||||||
return FragmentUtilsTest.getTests(FragmentUtilsTest.class);
|
return FragmentUtilsTest.getTests(FragmentUtilsTest.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "fragmentUtilsTest")
|
@Test(enabled = !DEBUG, dataProvider = "fragmentUtilsTest")
|
||||||
public void testAsPileup(FragmentUtilsTest test) {
|
public void testAsPileup(FragmentUtilsTest test) {
|
||||||
for ( TestState testState : test.statesForPileup ) {
|
for ( TestState testState : test.statesForPileup ) {
|
||||||
ReadBackedPileup rbp = testState.pileup;
|
ReadBackedPileup rbp = testState.pileup;
|
||||||
|
|
@ -129,7 +136,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "fragmentUtilsTest")
|
@Test(enabled = !DEBUG, dataProvider = "fragmentUtilsTest")
|
||||||
public void testAsListOfReadsFromPileup(FragmentUtilsTest test) {
|
public void testAsListOfReadsFromPileup(FragmentUtilsTest test) {
|
||||||
for ( TestState testState : test.statesForPileup ) {
|
for ( TestState testState : test.statesForPileup ) {
|
||||||
FragmentCollection<GATKSAMRecord> fp = FragmentUtils.create(testState.pileup.getReads());
|
FragmentCollection<GATKSAMRecord> fp = FragmentUtils.create(testState.pileup.getReads());
|
||||||
|
|
@ -138,7 +145,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "fragmentUtilsTest")
|
@Test(enabled = !DEBUG, dataProvider = "fragmentUtilsTest")
|
||||||
public void testAsListOfReads(FragmentUtilsTest test) {
|
public void testAsListOfReads(FragmentUtilsTest test) {
|
||||||
for ( TestState testState : test.statesForReads ) {
|
for ( TestState testState : test.statesForReads ) {
|
||||||
FragmentCollection<GATKSAMRecord> fp = FragmentUtils.create(testState.rawReads);
|
FragmentCollection<GATKSAMRecord> fp = FragmentUtils.create(testState.rawReads);
|
||||||
|
|
@ -147,7 +154,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, expectedExceptions = IllegalArgumentException.class)
|
@Test(enabled = !DEBUG, expectedExceptions = IllegalArgumentException.class)
|
||||||
public void testOutOfOrder() {
|
public void testOutOfOrder() {
|
||||||
final List<GATKSAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true);
|
final List<GATKSAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true);
|
||||||
final GATKSAMRecord left = pair.get(0);
|
final GATKSAMRecord left = pair.get(0);
|
||||||
|
|
@ -161,5 +168,76 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
||||||
@BeforeTest
|
@BeforeTest
|
||||||
public void setup() {
|
public void setup() {
|
||||||
header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
|
header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
|
||||||
|
rgForMerged = new GATKSAMReadGroupRecord("RG1");
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "MergeFragmentsTest")
|
||||||
|
public Object[][] createMergeFragmentsTest() throws Exception {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
final String leftFlank = "CCC";
|
||||||
|
final String rightFlank = "AAA";
|
||||||
|
final String allOverlappingBases = "ACGTACGTGGAACCTTAG";
|
||||||
|
for ( int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++ ) {
|
||||||
|
final String overlappingBases = allOverlappingBases.substring(0, overlapSize);
|
||||||
|
final byte[] overlappingBaseQuals = new byte[overlapSize];
|
||||||
|
for ( int i = 0; i < overlapSize; i++ ) overlappingBaseQuals[i] = (byte)(i + 30);
|
||||||
|
final GATKSAMRecord read1 = makeOverlappingRead(leftFlank, 20, overlappingBases, overlappingBaseQuals, "", 30, 1);
|
||||||
|
final GATKSAMRecord read2 = makeOverlappingRead("", 20, overlappingBases, overlappingBaseQuals, rightFlank, 30, leftFlank.length() + 1);
|
||||||
|
final GATKSAMRecord merged = makeOverlappingRead(leftFlank, 20, overlappingBases, overlappingBaseQuals, rightFlank, 30, 1);
|
||||||
|
tests.add(new Object[]{"equalQuals", read1, read2, merged});
|
||||||
|
|
||||||
|
// test that the merged read base quality is the
|
||||||
|
tests.add(new Object[]{"lowQualLeft", modifyBaseQualities(read1, leftFlank.length(), overlapSize), read2, merged});
|
||||||
|
tests.add(new Object[]{"lowQualRight", read1, modifyBaseQualities(read2, 0, overlapSize), merged});
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
private GATKSAMRecord modifyBaseQualities(final GATKSAMRecord read, final int startOffset, final int length) throws Exception {
|
||||||
|
final GATKSAMRecord readWithLowQuals = (GATKSAMRecord)read.clone();
|
||||||
|
final byte[] withLowQuals = Arrays.copyOf(read.getBaseQualities(), read.getBaseQualities().length);
|
||||||
|
for ( int i = startOffset; i < startOffset + length; i++ )
|
||||||
|
withLowQuals[i] = (byte)(read.getBaseQualities()[i] + (i % 2 == 0 ? -1 : 0));
|
||||||
|
readWithLowQuals.setBaseQualities(withLowQuals);
|
||||||
|
return readWithLowQuals;
|
||||||
|
}
|
||||||
|
|
||||||
|
private GATKSAMRecord makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases,
|
||||||
|
final byte[] overlapQuals, final String rightFlank, final int rightQual,
|
||||||
|
final int alignmentStart) {
|
||||||
|
final String bases = leftFlank + overlapBases + rightFlank;
|
||||||
|
final int readLength = bases.length();
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, alignmentStart, readLength);
|
||||||
|
final byte[] leftQuals = Utils.dupBytes((byte) leftQual, leftFlank.length());
|
||||||
|
final byte[] rightQuals = Utils.dupBytes((byte) rightQual, rightFlank.length());
|
||||||
|
final byte[] quals = Utils.concat(leftQuals, overlapQuals, rightQuals);
|
||||||
|
read.setCigarString(readLength + "M");
|
||||||
|
read.setReadBases(bases.getBytes());
|
||||||
|
for ( final EventType type : EventType.values() )
|
||||||
|
read.setBaseQualities(quals, type);
|
||||||
|
read.setReadGroup(rgForMerged);
|
||||||
|
read.setMappingQuality(60);
|
||||||
|
return read;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "MergeFragmentsTest")
|
||||||
|
public void testMergingTwoReads(final String name, final GATKSAMRecord read1, GATKSAMRecord read2, final GATKSAMRecord expectedMerged) {
|
||||||
|
final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2);
|
||||||
|
|
||||||
|
if ( expectedMerged == null ) {
|
||||||
|
Assert.assertNull(actual, "Expected reads not to merge, but got non-null result from merging");
|
||||||
|
} else {
|
||||||
|
Assert.assertTrue(actual.isStrandless(), "Merged reads should be strandless");
|
||||||
|
Assert.assertNotNull(actual, "Expected reads to merge, but got null result from merging");
|
||||||
|
// I really care about the bases, the quals, the CIGAR, and the read group tag
|
||||||
|
Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString());
|
||||||
|
Assert.assertEquals(actual.getReadBases(), expectedMerged.getReadBases());
|
||||||
|
Assert.assertEquals(actual.getReadGroup(), expectedMerged.getReadGroup());
|
||||||
|
Assert.assertEquals(actual.getMappingQuality(), expectedMerged.getMappingQuality());
|
||||||
|
for ( final EventType type : EventType.values() )
|
||||||
|
Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,7 @@ public class GATKSAMRecordUnitTest extends BaseTest {
|
||||||
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
||||||
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
|
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
|
||||||
}
|
}
|
||||||
|
Assert.assertEquals(reducedRead.isStrandless(), false, "Reduced reads don't have meaningful strandedness information");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -103,7 +104,35 @@ public class GATKSAMRecordUnitTest extends BaseTest {
|
||||||
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null);
|
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null);
|
||||||
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
|
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
|
||||||
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
|
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStrandlessReads() {
|
||||||
|
final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
|
||||||
|
final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 };
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
|
||||||
|
Assert.assertEquals(read.isStrandless(), false);
|
||||||
|
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
Assert.assertEquals(read.isStrandless(), false);
|
||||||
|
Assert.assertEquals(read.getReadNegativeStrandFlag(), false);
|
||||||
|
|
||||||
|
read.setReadNegativeStrandFlag(true);
|
||||||
|
Assert.assertEquals(read.isStrandless(), false);
|
||||||
|
Assert.assertEquals(read.getReadNegativeStrandFlag(), true);
|
||||||
|
|
||||||
|
read.setReadNegativeStrandFlag(true);
|
||||||
|
read.setIsStrandless(true);
|
||||||
|
Assert.assertEquals(read.isStrandless(), true);
|
||||||
|
Assert.assertEquals(read.getReadNegativeStrandFlag(), false, "negative strand flag should return false even through its set for a strandless read");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = IllegalStateException.class)
|
||||||
|
public void testStrandlessReadsFailSetStrand() {
|
||||||
|
final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
|
||||||
|
final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 };
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
|
||||||
|
read.setIsStrandless(true);
|
||||||
|
read.setReadNegativeStrandFlag(true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue