Quick QC walkers to look at the error profile of indels in the read
This commit is contained in:
parent
24173d860a
commit
08dbd756f3
|
|
@ -0,0 +1,94 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* One or more BAM files.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* Number of reads ending in each category.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T ReadEndIndels \
|
||||||
|
* -o output.grp \
|
||||||
|
* -I input.bam \
|
||||||
|
* [-L input.intervals]
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||||
|
public class CountReadEventsWalker extends ReadWalker<Map<CigarOperator, ArrayList<Integer>> , Map<Integer, Map<CigarOperator, Long>>> {
|
||||||
|
@Output (doc = "GATKReport table output")
|
||||||
|
PrintStream out;
|
||||||
|
|
||||||
|
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||||
|
return ReadUtils.getCigarOperatorForAllBases(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Map<CigarOperator, Long>> reduceInit() {
|
||||||
|
return new HashMap<Integer, Map<CigarOperator, Long>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Map<CigarOperator, Long>> reduce(Map<CigarOperator, ArrayList<Integer>> value, Map<Integer, Map<CigarOperator, Long>> sum) {
|
||||||
|
for (Map.Entry<CigarOperator, ArrayList<Integer>> entry : value.entrySet()) {
|
||||||
|
CigarOperator op = entry.getKey();
|
||||||
|
ArrayList<Integer> positions = entry.getValue();
|
||||||
|
|
||||||
|
for (int p : positions) {
|
||||||
|
Map<CigarOperator, Long> operatorCount = sum.get(p);
|
||||||
|
if (operatorCount == null) {
|
||||||
|
operatorCount = new HashMap<CigarOperator, Long>();
|
||||||
|
sum.put(p, operatorCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
Long count = operatorCount.get(op);
|
||||||
|
if (count == null)
|
||||||
|
count = 0L;
|
||||||
|
count++;
|
||||||
|
operatorCount.put(op, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Map<Integer, Map<CigarOperator, Long>> result) {
|
||||||
|
GATKReport report = GATKReport.newSimpleReport("Events", "Position", "Event", "Observations");
|
||||||
|
for (Map.Entry<Integer, Map<CigarOperator, Long>> entry : result.entrySet()) {
|
||||||
|
int position = entry.getKey();
|
||||||
|
Map<CigarOperator, Long> operatorCount = entry.getValue();
|
||||||
|
|
||||||
|
for (Map.Entry<CigarOperator, Long> subEntry: operatorCount.entrySet()) {
|
||||||
|
String operator = subEntry.getKey().name();
|
||||||
|
Long observations = subEntry.getValue();
|
||||||
|
report.addRow(position, operator, observations);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
report.print(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* One or more BAM files.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* Number of reads ending in each category.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T ReadEndIndels \
|
||||||
|
* -o output.txt \
|
||||||
|
* -I input.bam \
|
||||||
|
* [-L input.intervals]
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||||
|
public class CountTerminusEventWalker extends ReadWalker<Pair<Long, Long>, Pair<Long, Long>> {
|
||||||
|
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||||
|
List<CigarElement> cigarElements = read.getCigar().getCigarElements();
|
||||||
|
|
||||||
|
CigarElement lastElement = null;
|
||||||
|
for (CigarElement element : cigarElements) {
|
||||||
|
if (element.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
|
lastElement = element;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastElement == null)
|
||||||
|
throw new UserException.MalformedBAM(read, "read does not have any bases, it's all hard clips");
|
||||||
|
|
||||||
|
long endsInIndel = lastElement.getOperator() == CigarOperator.INSERTION || lastElement.getOperator() == CigarOperator.DELETION? 1 : 0;
|
||||||
|
long endsInSC = lastElement.getOperator() == CigarOperator.SOFT_CLIP ? 1 : 0;
|
||||||
|
|
||||||
|
return new Pair<Long, Long>(endsInIndel, endsInSC);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<Long, Long> reduceInit() { return new Pair<Long, Long>(0L, 0L); }
|
||||||
|
|
||||||
|
public Pair<Long, Long> reduce(Pair<Long, Long> value, Pair<Long, Long> sum) {
|
||||||
|
sum.set(sum.getFirst() + value.getFirst(), sum.getSecond() + value.getSecond());
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Pair<Long, Long> result) {
|
||||||
|
System.out.println(String.format("\tReads ending in indels : %d\n\tReads ending in soft-clips: %d\n", result.getFirst(), result.getSecond()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -783,4 +783,43 @@ public class ReadUtils {
|
||||||
return location;
|
return location;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a map with each event in the read (cigar operator) and the read coordinate where it happened.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* D -> 2, 34, 75
|
||||||
|
* I -> 55
|
||||||
|
* S -> 0, 101
|
||||||
|
* H -> 101
|
||||||
|
*
|
||||||
|
* @param read the read
|
||||||
|
* @return a map with the properties described above. See example
|
||||||
|
*/
|
||||||
|
public static Map<CigarOperator, ArrayList<Integer>> getCigarOperatorForAllBases (GATKSAMRecord read) {
|
||||||
|
Map<CigarOperator, ArrayList<Integer>> events = new HashMap<CigarOperator, ArrayList<Integer>>();
|
||||||
|
|
||||||
|
int position = 0;
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
CigarOperator op = cigarElement.getOperator();
|
||||||
|
if (op.consumesReadBases()) {
|
||||||
|
ArrayList<Integer> list = events.get(op);
|
||||||
|
if (list == null) {
|
||||||
|
list = new ArrayList<Integer>();
|
||||||
|
events.put(op, list);
|
||||||
|
}
|
||||||
|
for (int i = position; i < cigarElement.getLength(); i++)
|
||||||
|
list.add(position++);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ArrayList<Integer> list = events.get(op);
|
||||||
|
if (list == null) {
|
||||||
|
list = new ArrayList<Integer>();
|
||||||
|
events.put(op, list);
|
||||||
|
}
|
||||||
|
list.add(position);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return events;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue