2009-04-15 06:13:10 +08:00
|
|
|
package org.broadinstitute.sting.utils;
|
|
|
|
|
|
2009-05-12 23:08:24 +08:00
|
|
|
import net.sf.samtools.*;
|
2009-04-15 06:13:10 +08:00
|
|
|
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Arrays;
|
2009-05-01 14:27:37 +08:00
|
|
|
import java.util.Random;
|
2009-04-15 06:13:10 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Created by IntelliJ IDEA.
|
|
|
|
|
* User: depristo
|
|
|
|
|
* Date: Apr 14, 2009
|
|
|
|
|
* Time: 8:54:05 AM
|
|
|
|
|
* To change this template use File | Settings | File Templates.
|
|
|
|
|
*/
|
|
|
|
|
abstract public class BasicPileup implements Pileup {
|
2009-09-29 23:58:43 +08:00
|
|
|
|
|
|
|
|
public static final char DELETION_CHAR = 'D';
|
|
|
|
|
|
|
|
|
|
protected boolean includeDeletions = false;
|
|
|
|
|
|
|
|
|
|
public void setIncludeDeletionsInPileupString(boolean value) {
|
2009-10-08 04:03:34 +08:00
|
|
|
includeDeletions = value;
|
2009-09-29 23:58:43 +08:00
|
|
|
}
|
|
|
|
|
|
2009-04-15 06:13:10 +08:00
|
|
|
public String getPileupString()
|
|
|
|
|
{
|
2009-11-19 01:03:48 +08:00
|
|
|
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
|
2009-05-08 02:03:13 +08:00
|
|
|
public static List<Integer> constantOffset( List<SAMRecord> reads, int i ) {
|
|
|
|
|
List<Integer> l = new ArrayList<Integer>(reads.size());
|
|
|
|
|
for ( SAMRecord read : reads ) {
|
|
|
|
|
l.add(i);
|
|
|
|
|
}
|
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-15 06:13:10 +08:00
|
|
|
public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
2009-10-08 04:03:34 +08:00
|
|
|
return basePileupAsString( reads, offsets, false );
|
2009-09-29 23:58:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
2009-04-15 06:13:10 +08:00
|
|
|
StringBuilder bases = new StringBuilder();
|
2009-11-19 01:03:48 +08:00
|
|
|
for ( byte base : getBasesAsArrayList(reads, offsets, includeDeletions)) {
|
2009-04-15 06:13:10 +08:00
|
|
|
bases.append((char)base);
|
|
|
|
|
}
|
|
|
|
|
return bases.toString();
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-11 01:31:00 +08:00
|
|
|
public static String baseWithStrandPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
2009-10-08 04:03:34 +08:00
|
|
|
return baseWithStrandPileupAsString( reads, offsets, false );
|
2009-09-17 00:44:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String baseWithStrandPileupAsString( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
2009-06-11 01:31:00 +08:00
|
|
|
StringBuilder bases = new StringBuilder();
|
|
|
|
|
|
|
|
|
|
for ( int i = 0; i < reads.size(); i++ ) {
|
|
|
|
|
SAMRecord read = reads.get(i);
|
|
|
|
|
int offset = offsets.get(i);
|
|
|
|
|
|
2009-10-08 04:03:34 +08:00
|
|
|
char base;
|
|
|
|
|
if ( offset == -1 ) {
|
|
|
|
|
if ( includeDeletions )
|
|
|
|
|
base = DELETION_CHAR;
|
|
|
|
|
else
|
|
|
|
|
continue;
|
|
|
|
|
} else {
|
|
|
|
|
base = (char) read.getReadBases()[offset];
|
|
|
|
|
}
|
2009-06-11 01:31:00 +08:00
|
|
|
|
|
|
|
|
base = Character.toUpperCase(base);
|
|
|
|
|
if (read.getReadNegativeStrandFlag()) {
|
|
|
|
|
base = Character.toLowerCase(base);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bases.append(base);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return bases.toString();
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
//
|
|
|
|
|
// byte[] methods
|
|
|
|
|
//
|
|
|
|
|
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
|
2009-11-20 07:06:49 +08:00
|
|
|
return getBasesAsArray(reads,offsets,false);
|
2009-09-17 00:44:57 +08:00
|
|
|
}
|
|
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
2009-11-20 07:06:49 +08:00
|
|
|
return getBasesAsArray(reads,offsets,includeDeletions);
|
2009-11-19 01:03:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
|
2009-11-20 07:06:49 +08:00
|
|
|
return getQualsAsArray( reads, offsets,false);
|
2009-11-19 01:03:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
2009-11-20 07:06:49 +08:00
|
|
|
return getQualsAsArray( reads, offsets, includeDeletions);
|
2009-11-19 01:03:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// ArrayList<Byte> methods
|
|
|
|
|
//
|
|
|
|
|
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
|
|
|
return getBasesAsArrayList( reads, offsets, false );
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-20 07:06:49 +08:00
|
|
|
public static byte[] getBasesAsArray( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
|
|
|
|
byte array[] = new byte[reads.size()];
|
|
|
|
|
int index = 0;
|
2009-04-15 06:13:10 +08:00
|
|
|
for ( int i = 0; i < reads.size(); i++ ) {
|
|
|
|
|
SAMRecord read = reads.get(i);
|
|
|
|
|
int offset = offsets.get(i);
|
2009-10-08 04:03:34 +08:00
|
|
|
if ( offset == -1 ) {
|
|
|
|
|
if ( includeDeletions )
|
2009-11-20 07:06:49 +08:00
|
|
|
array[index++] = ((byte)DELETION_CHAR);
|
2009-10-08 04:03:34 +08:00
|
|
|
} else {
|
2009-11-20 07:06:49 +08:00
|
|
|
array[index++] = read.getReadBases()[offset];
|
2009-10-08 04:03:34 +08:00
|
|
|
}
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
2009-11-20 07:06:49 +08:00
|
|
|
return array;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
|
|
|
|
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
|
|
|
|
for (byte value : getBasesAsArray(reads, offsets, includeDeletions))
|
|
|
|
|
bases.add(value);
|
2009-04-15 06:13:10 +08:00
|
|
|
return bases;
|
2009-10-08 04:03:34 +08:00
|
|
|
}
|
2009-04-15 06:13:10 +08:00
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
|
|
|
return getQualsAsArrayList( reads, offsets, false );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
2009-07-06 23:41:30 +08:00
|
|
|
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
2009-11-20 07:06:49 +08:00
|
|
|
for (byte value : getQualsAsArray(reads, offsets, includeDeletions))
|
|
|
|
|
quals.add(value);
|
|
|
|
|
return quals;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
|
|
|
|
byte array[] = new byte[reads.size()];
|
|
|
|
|
int index = 0;
|
2009-04-15 06:13:10 +08:00
|
|
|
for ( int i = 0; i < reads.size(); i++ ) {
|
|
|
|
|
SAMRecord read = reads.get(i);
|
|
|
|
|
int offset = offsets.get(i);
|
2009-11-19 01:03:48 +08:00
|
|
|
|
2009-10-08 04:03:34 +08:00
|
|
|
// skip deletion sites
|
2009-11-19 01:03:48 +08:00
|
|
|
if ( offset == -1 ) {
|
|
|
|
|
if ( includeDeletions ) // we need the qual vector to be the same length as base vector!
|
2009-11-20 07:06:49 +08:00
|
|
|
array[index++] = ((byte)0);
|
2009-11-19 01:03:48 +08:00
|
|
|
} else {
|
2009-11-20 07:06:49 +08:00
|
|
|
array[index++] = read.getBaseQualities()[offset];
|
2009-11-19 01:03:48 +08:00
|
|
|
}
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
2009-11-20 07:06:49 +08:00
|
|
|
return array;
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
2009-07-06 23:41:30 +08:00
|
|
|
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
2009-04-15 06:13:10 +08:00
|
|
|
for ( int i = 0; i < reads.size(); i++ ) {
|
|
|
|
|
SAMRecord read = reads.get(i);
|
|
|
|
|
byte qual = (byte)read.getMappingQuality();
|
|
|
|
|
quals.add(qual);
|
|
|
|
|
}
|
|
|
|
|
return quals;
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-22 06:25:16 +08:00
|
|
|
public static String mappingQualPileupAsString( List<SAMRecord> reads) {
|
|
|
|
|
return quals2String(mappingQualPileup(reads));
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
private static byte[] ArrayList2byte(ArrayList<Byte> ab) {
|
|
|
|
|
byte[] ret = new byte[ab.size()];
|
|
|
|
|
int i = 0;
|
|
|
|
|
for (byte e : ab)
|
|
|
|
|
ret[i++] = e;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-22 06:25:16 +08:00
|
|
|
public static String quals2String( List<Byte> quals ) {
|
|
|
|
|
StringBuilder qualStr = new StringBuilder();
|
|
|
|
|
for ( int qual : quals ) {
|
2009-04-15 06:13:10 +08:00
|
|
|
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
|
|
|
|
|
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
|
2009-05-22 06:25:16 +08:00
|
|
|
qualStr.append(qualChar);
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
|
2009-05-22 06:25:16 +08:00
|
|
|
return qualStr.toString();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String qualPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
2009-11-19 01:03:48 +08:00
|
|
|
return quals2String(getQualsAsArrayList(reads, offsets));
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
// todo -- there are probably bugs in here due to includeDeletion flags
|
|
|
|
|
public static ArrayList<Byte> getSecondaryBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
2009-04-15 06:13:10 +08:00
|
|
|
ArrayList<Byte> bases2 = new ArrayList<Byte>(reads.size());
|
2009-11-12 23:18:21 +08:00
|
|
|
boolean hasAtLeastOneSQorE2Field = false;
|
2009-04-15 06:13:10 +08:00
|
|
|
|
|
|
|
|
for ( int i = 0; i < reads.size(); i++ ) {
|
|
|
|
|
SAMRecord read = reads.get(i);
|
|
|
|
|
int offset = offsets.get(i);
|
|
|
|
|
|
2009-11-12 23:18:21 +08:00
|
|
|
if (read.getAttribute("SQ") != null) {
|
|
|
|
|
byte[] compressedQuals = (byte[]) read.getAttribute("SQ");
|
|
|
|
|
byte base2;
|
2009-06-11 01:31:00 +08:00
|
|
|
|
2009-11-12 23:18:21 +08:00
|
|
|
if (offset != -1 && compressedQuals != null && compressedQuals.length == read.getReadLength()) {
|
|
|
|
|
base2 = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset]));
|
|
|
|
|
hasAtLeastOneSQorE2Field = true;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
base2 = (byte) '.';
|
|
|
|
|
}
|
|
|
|
|
bases2.add((byte) base2);
|
|
|
|
|
}
|
|
|
|
|
else if (read.getAttribute("E2") != null) {
|
|
|
|
|
String secondaries = (String) read.getAttribute("E2");
|
|
|
|
|
char base2;
|
|
|
|
|
if (offset != -1 && secondaries != null && secondaries.length() == read.getReadLength()) {
|
|
|
|
|
base2 = secondaries.charAt(offset);
|
|
|
|
|
hasAtLeastOneSQorE2Field = true;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
base2 = '.';
|
|
|
|
|
}
|
|
|
|
|
bases2.add((byte) base2);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
byte base2 = 'N';
|
|
|
|
|
bases2.add(base2);
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
}
|
2009-11-12 23:18:21 +08:00
|
|
|
return (hasAtLeastOneSQorE2Field ? bases2 : null);
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
public static String getSecondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
2009-04-15 06:13:10 +08:00
|
|
|
StringBuilder bases2 = new StringBuilder();
|
2009-11-19 01:03:48 +08:00
|
|
|
ArrayList<Byte> sbases = getSecondaryBasesAsArrayList(reads, offsets);
|
2009-04-15 06:13:10 +08:00
|
|
|
|
2009-05-01 14:27:37 +08:00
|
|
|
if (sbases == null) { return null; }
|
2009-04-15 06:13:10 +08:00
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
ArrayList<Byte> pbases = getBasesAsArrayList(reads, offsets,true);
|
2009-05-01 14:27:37 +08:00
|
|
|
|
|
|
|
|
Random generator = new Random();
|
|
|
|
|
|
|
|
|
|
for (int pileupIndex = 0; pileupIndex < sbases.size(); pileupIndex++) {
|
|
|
|
|
byte pbase = pbases.get(pileupIndex);
|
|
|
|
|
byte sbase = sbases.get(pileupIndex);
|
|
|
|
|
|
|
|
|
|
while (sbase == pbase) {
|
|
|
|
|
sbase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bases2.append((char) sbase);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2009-04-15 06:13:10 +08:00
|
|
|
for (byte base2 : secondaryBasePileup(reads, offsets)) {
|
|
|
|
|
bases2.append((char) base2);
|
|
|
|
|
}
|
2009-05-01 14:27:37 +08:00
|
|
|
*/
|
2009-04-15 06:13:10 +08:00
|
|
|
|
|
|
|
|
return bases2.toString();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static ArrayList<Byte> secondaryQualPileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
|
|
|
ArrayList<Byte> quals2 = new ArrayList<Byte>(reads.size());
|
|
|
|
|
boolean hasAtLeastOneSQField = false;
|
|
|
|
|
|
|
|
|
|
for ( int i = 0; i < reads.size(); i++ ) {
|
|
|
|
|
SAMRecord read = reads.get(i);
|
|
|
|
|
int offset = offsets.get(i);
|
|
|
|
|
|
|
|
|
|
byte[] compressedQuals = (byte[]) read.getAttribute("SQ");
|
|
|
|
|
byte qual2;
|
2009-09-17 00:44:57 +08:00
|
|
|
if (offset != -1 && compressedQuals != null) {
|
2009-04-15 06:13:10 +08:00
|
|
|
qual2 = QualityUtils.probToQual(QualityUtils.compressedQualityToProb(compressedQuals[offset]));
|
|
|
|
|
hasAtLeastOneSQField = true;
|
|
|
|
|
} else {
|
|
|
|
|
qual2 = 0;
|
|
|
|
|
}
|
|
|
|
|
quals2.add(qual2);
|
|
|
|
|
}
|
|
|
|
|
return (hasAtLeastOneSQField ? quals2 : null);
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-22 06:25:16 +08:00
|
|
|
public static String secondaryQualPileupAsString( List<SAMRecord> reads, List<Integer> offsets) {
|
2009-04-15 06:13:10 +08:00
|
|
|
StringBuilder quals2 = new StringBuilder();
|
|
|
|
|
ArrayList<Byte> sqquals = secondaryQualPileup(reads, offsets);
|
|
|
|
|
|
2009-05-22 06:25:16 +08:00
|
|
|
if (sqquals == null) {
|
|
|
|
|
return null;
|
|
|
|
|
} else {
|
|
|
|
|
for (byte qual2 : secondaryQualPileup(reads, offsets)) {
|
|
|
|
|
quals2.append(qual2);
|
|
|
|
|
}
|
|
|
|
|
return quals2.toString();
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-22 06:26:25 +08:00
|
|
|
public static double[][] probDistPileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
|
|
|
double[][] dist = new double[reads.size()][4];
|
|
|
|
|
|
|
|
|
|
for (int readIndex = 0; readIndex < dist.length; readIndex++) {
|
|
|
|
|
SAMRecord read = reads.get(readIndex);
|
|
|
|
|
|
|
|
|
|
String bases = read.getReadString();
|
|
|
|
|
int offset = offsets.get(readIndex);
|
2009-10-08 04:03:34 +08:00
|
|
|
if ( offset == -1 )
|
|
|
|
|
continue;
|
2009-04-22 06:26:25 +08:00
|
|
|
|
|
|
|
|
int bestBaseIndex = BaseUtils.simpleBaseToBaseIndex(bases.charAt(offset));
|
|
|
|
|
|
|
|
|
|
if (bestBaseIndex >= 0 && bestBaseIndex < 4) {
|
|
|
|
|
dist[readIndex][bestBaseIndex] = QualityUtils.qualToProb(read.getBaseQualities()[offset]);
|
|
|
|
|
|
|
|
|
|
byte[] sqs = (byte[]) read.getAttribute("SQ");
|
2009-04-22 07:36:09 +08:00
|
|
|
if (sqs != null && QualityUtils.compressedQualityToBaseIndex(sqs[offset]) != bestBaseIndex) {
|
2009-04-22 08:01:37 +08:00
|
|
|
double epsilon = 1e-4;
|
|
|
|
|
|
2009-04-22 06:26:25 +08:00
|
|
|
int secondBestBaseIndex = QualityUtils.compressedQualityToBaseIndex(sqs[offset]);
|
2009-04-24 11:34:43 +08:00
|
|
|
//dist[readIndex][secondBestBaseIndex] = (1.0 - dist[readIndex][bestBaseIndex] - 2.0*epsilon);
|
|
|
|
|
dist[readIndex][secondBestBaseIndex] = 0.8*(1.0 - dist[readIndex][bestBaseIndex]);
|
2009-04-22 08:01:37 +08:00
|
|
|
|
|
|
|
|
for (int baseIndex = 0; baseIndex < 4; baseIndex++) {
|
|
|
|
|
if (baseIndex != bestBaseIndex && baseIndex != secondBestBaseIndex) {
|
2009-04-24 11:34:43 +08:00
|
|
|
//dist[readIndex][baseIndex] = epsilon;
|
|
|
|
|
dist[readIndex][baseIndex] = 0.1*(1.0 - dist[readIndex][bestBaseIndex]);
|
2009-04-22 08:01:37 +08:00
|
|
|
}
|
|
|
|
|
}
|
2009-04-22 06:26:25 +08:00
|
|
|
} else {
|
|
|
|
|
for (int baseIndex = 0; baseIndex < 4; baseIndex++) {
|
|
|
|
|
if (baseIndex != bestBaseIndex) {
|
2009-04-22 07:36:09 +08:00
|
|
|
dist[readIndex][baseIndex] = (1.0 - dist[readIndex][bestBaseIndex])/3.0;
|
2009-04-22 06:26:25 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (int baseIndex = 0; baseIndex < 4; baseIndex++) {
|
|
|
|
|
dist[readIndex][baseIndex] = 0.25;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dist;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String probDistPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
|
|
|
double[][] dist = probDistPileup(reads, offsets);
|
|
|
|
|
|
2009-04-24 11:34:43 +08:00
|
|
|
String distString = String.format(" %c %c %c %c\n", 'A', 'C', 'G', 'T');
|
2009-04-22 06:26:25 +08:00
|
|
|
for (int readIndex = 0; readIndex < dist.length; readIndex++) {
|
|
|
|
|
distString += "[ ";
|
|
|
|
|
for (int baseIndex = 0; baseIndex < 4; baseIndex++) {
|
2009-04-24 11:34:43 +08:00
|
|
|
distString += String.format("%4.4f ", dist[readIndex][baseIndex]);
|
2009-04-22 06:26:25 +08:00
|
|
|
}
|
|
|
|
|
distString += "]\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return distString;
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-12 23:08:24 +08:00
|
|
|
public static String[] indelPileup( List<SAMRecord> reads, List<Integer> offsets )
|
|
|
|
|
{
|
|
|
|
|
String[] indels = new String[reads.size()];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < reads.size(); i++)
|
|
|
|
|
{
|
|
|
|
|
SAMRecord read = reads.get(i);
|
|
|
|
|
Cigar cigar = read.getCigar();
|
2009-09-17 00:44:57 +08:00
|
|
|
int offset = offsets.get(i);
|
2009-05-12 23:08:24 +08:00
|
|
|
|
|
|
|
|
String cigar_string = read.getCigarString();
|
|
|
|
|
if (! (cigar_string.contains("I") || cigar_string.contains("D"))) { indels[i] = "null"; continue; }
|
|
|
|
|
|
|
|
|
|
//System.out.printf("%s:%d %s %s %s ", read.getReferenceName(), read.getAlignmentStart(), read.getReadName(), read.getReadString(), cigar_string);
|
|
|
|
|
int k = 0;
|
|
|
|
|
for (int j = 0; j < cigar.numCigarElements(); j++)
|
|
|
|
|
{
|
|
|
|
|
CigarOperator operator = cigar.getCigarElement(j).getOperator();
|
|
|
|
|
int length = cigar.getCigarElement(j).getLength();
|
|
|
|
|
if (operator == CigarOperator.M)
|
|
|
|
|
{
|
|
|
|
|
k += length;
|
|
|
|
|
}
|
|
|
|
|
else if ((k == offset+1) && (operator == CigarOperator.I))
|
|
|
|
|
{
|
|
|
|
|
// this insertion is associated with this offset (kinda ;) ).
|
|
|
|
|
indels[i] = read.getReadString().substring(k, k+length);
|
|
|
|
|
//System.out.printf("(I,%d,%d)", k, offset);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else if ((k != offset+1) && (operator == CigarOperator.I))
|
|
|
|
|
{
|
|
|
|
|
//System.out.printf("(i,%d,%d)", k, offset);
|
|
|
|
|
k += length;
|
|
|
|
|
}
|
|
|
|
|
else if ((k == offset) && (operator == CigarOperator.D))
|
|
|
|
|
{
|
|
|
|
|
// this deletion is associated with this offset.
|
|
|
|
|
indels[i] = length + "D";
|
|
|
|
|
//System.out.printf("(D,%d,%d)", k, offset);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else if (k >= offset)
|
|
|
|
|
{
|
|
|
|
|
// no indel here.
|
|
|
|
|
indels[i] = "null";
|
|
|
|
|
//System.out.printf("(N,%d,%d)", k, offset);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (indels[i] == null) { indels[i] = "null"; }
|
|
|
|
|
//System.out.printf("\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return indels;
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-15 06:13:10 +08:00
|
|
|
public static String pileupDiff(final Pileup a, final Pileup b)
|
|
|
|
|
{
|
|
|
|
|
return pileupDiff(a,b,true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static String maybeSorted( final String x, boolean sortMe )
|
|
|
|
|
{
|
|
|
|
|
if ( sortMe ) {
|
|
|
|
|
byte[] bytes = x.getBytes();
|
|
|
|
|
Arrays.sort(bytes);
|
|
|
|
|
return new String(bytes);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String pileupDiff(final Pileup a, final Pileup b, boolean orderDependent)
|
|
|
|
|
{
|
|
|
|
|
if ( a.size() != b.size() )
|
|
|
|
|
return "Sizes not equal";
|
|
|
|
|
if ( a.getLocation().compareTo(b.getLocation()) != 0 )
|
|
|
|
|
return "Locations not equal";
|
|
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
String aBases = maybeSorted(a.getBasesAsString(), ! orderDependent );
|
|
|
|
|
String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent );
|
2009-04-15 06:13:10 +08:00
|
|
|
if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) )
|
|
|
|
|
return "Bases not equal";
|
|
|
|
|
|
2009-11-19 01:03:48 +08:00
|
|
|
String aQuals = maybeSorted(a.getQualsAsString(), ! orderDependent );
|
|
|
|
|
String bQuals = maybeSorted(b.getQualsAsString(), ! orderDependent );
|
2009-04-15 06:13:10 +08:00
|
|
|
if ( ! aQuals.equals(bQuals) )
|
|
|
|
|
return "Quals not equal";
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
2009-05-08 02:03:13 +08:00
|
|
|
|
2009-07-23 01:54:44 +08:00
|
|
|
public static class BaseCounts {
|
2009-10-17 05:56:56 +08:00
|
|
|
public int a, c, t, g;
|
2009-07-23 01:54:44 +08:00
|
|
|
|
|
|
|
|
public BaseCounts(int a, int c, int t, int g) {
|
|
|
|
|
this.a = a;
|
|
|
|
|
this.c = c;
|
|
|
|
|
this.t = t;
|
|
|
|
|
this.g = g;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static int countBase(final char base, final String bases) {
|
|
|
|
|
return Utils.countOccurrences(base, bases);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static BaseCounts countBases(final String bases) {
|
2009-05-08 02:03:13 +08:00
|
|
|
String canon = bases.toUpperCase();
|
2009-07-23 01:54:44 +08:00
|
|
|
return new BaseCounts(Utils.countOccurrences('A', canon),
|
|
|
|
|
Utils.countOccurrences('C', canon),
|
|
|
|
|
Utils.countOccurrences('T', canon),
|
|
|
|
|
Utils.countOccurrences('G', canon));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static byte consensusBase(String bases) {
|
|
|
|
|
BaseCounts counts = countBases( bases );
|
|
|
|
|
int ACount = counts.a;
|
|
|
|
|
int CCount = counts.c;
|
|
|
|
|
int TCount = counts.t;
|
|
|
|
|
int GCount = counts.g;
|
2009-05-08 02:03:13 +08:00
|
|
|
|
|
|
|
|
int m = Math.max(ACount, Math.max(CCount, Math.max(TCount, GCount)));
|
|
|
|
|
if ( ACount == m ) return 'A';
|
|
|
|
|
if ( CCount == m ) return 'C';
|
|
|
|
|
if ( TCount == m ) return 'T';
|
|
|
|
|
if ( GCount == m ) return 'G';
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
2009-07-23 01:54:44 +08:00
|
|
|
|
|
|
|
|
|