Added some methods to format bases such that read bases on the forward strand are in uppercase, while those on the negative strand are lowercase. This does *not* affect the default functionality of the standard PileupWalker
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@969 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9ebcd6546d
commit
b0cc763eb5
|
|
@ -16,30 +16,10 @@ import java.util.List;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
|
||||||
* samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>
|
|
||||||
*
|
|
||||||
* Print the alignment in the pileup format. In the pileup format, each line represents a genomic position,
|
|
||||||
* consisting of chromosome name, coordinate, reference base, read bases, read qualities and alignment mapping
|
|
||||||
* qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all
|
|
||||||
* encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand,
|
|
||||||
* a comma for a match on the reverse strand, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the
|
|
||||||
* reverse strand.
|
|
||||||
*
|
|
||||||
* A pattern '\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next
|
|
||||||
* reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence.
|
|
||||||
* Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference.
|
|
||||||
* Also at the read base column, a symbol '^' marks the start of a read segment which is a contiguous subsequence on the read
|
|
||||||
* separated by 'N/S/H' CIGAR operations. The ASCII of the character following '^' minus 33 gives the mapping quality.
|
|
||||||
* A symbol '$' marks the end of a read segment.
|
|
||||||
*/
|
|
||||||
public class PileupWithContextWalker extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
|
public class PileupWithContextWalker extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||||
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
|
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
|
||||||
public boolean alwaysShowSecondBase = false;
|
public boolean alwaysShowSecondBase = false;
|
||||||
|
|
||||||
//@Argument(fullName="showSecondBaseQuals",doc="If true, prints out second base qualities in the pileup",required=false)
|
|
||||||
//public boolean showSecondBaseQuals = false;
|
|
||||||
|
|
||||||
@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
|
@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
|
||||||
public boolean qualsAsInts = false;
|
public boolean qualsAsInts = false;
|
||||||
|
|
||||||
|
|
@ -68,7 +48,7 @@ public class PileupWithContextWalker extends LocusWalker<Integer, Integer> imple
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
String bases = pileup.getBases();
|
String bases = pileup.getBasesWithStrand();
|
||||||
|
|
||||||
if ( bases.equals("") && FLAG_UNCOVERED_BASES ) {
|
if ( bases.equals("") && FLAG_UNCOVERED_BASES ) {
|
||||||
bases = "***UNCOVERED_SITE***";
|
bases = "***UNCOVERED_SITE***";
|
||||||
|
|
@ -82,14 +62,6 @@ public class PileupWithContextWalker extends LocusWalker<Integer, Integer> imple
|
||||||
}
|
}
|
||||||
if ( secondBasePileup != null ) extras.append(" ").append(secondBasePileup);
|
if ( secondBasePileup != null ) extras.append(" ").append(secondBasePileup);
|
||||||
|
|
||||||
/*
|
|
||||||
if ( showSecondBaseQuals ) {
|
|
||||||
String secondQualPileup = pileup.getSecondaryQualPileup();
|
|
||||||
if ( secondQualPileup == null )
|
|
||||||
secondQualPileup = Utils.dupString((char)(33), bases.length());
|
|
||||||
extras.append(" ").append(secondQualPileup);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
if (contig == null || !context.getContig().equals(contig)) {
|
if (contig == null || !context.getContig().equals(contig)) {
|
||||||
contig = context.getContig();
|
contig = context.getContig();
|
||||||
contigSeq = refSeq.getSequence(contig);
|
contigSeq = refSeq.getSequence(contig);
|
||||||
|
|
@ -109,9 +81,7 @@ public class PileupWithContextWalker extends LocusWalker<Integer, Integer> imple
|
||||||
String rodString = "";
|
String rodString = "";
|
||||||
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
|
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
|
||||||
if ( datum != null && ! (datum instanceof rodDbSNP)) {
|
if ( datum != null && ! (datum instanceof rodDbSNP)) {
|
||||||
//System.out.printf("rod = %s%n", datum.toSimpleString());
|
|
||||||
rodString += datum.toSimpleString();
|
rodString += datum.toSimpleString();
|
||||||
//System.out.printf("Rod string %s%n", rodString);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -122,10 +92,7 @@ public class PileupWithContextWalker extends LocusWalker<Integer, Integer> imple
|
||||||
if ( rodString != "" )
|
if ( rodString != "" )
|
||||||
rodString = "[ROD: " + rodString + "]";
|
rodString = "[ROD: " + rodString + "]";
|
||||||
|
|
||||||
//if ( context.getLocation().getStart() % 1 == 0 ) {
|
out.printf("%s%s %s%n", pileup.getPileupWithStrandString(qualsAsInts), extras, rodString);
|
||||||
//System.out.printf("quals as ints %b%n", qualsAsInts);
|
|
||||||
out.printf("%s%s %s%n", pileup.getPileupString(qualsAsInts), extras, rodString);
|
|
||||||
//}
|
|
||||||
|
|
||||||
if ( EXTENDED ) {
|
if ( EXTENDED ) {
|
||||||
String probDists = pileup.getProbDistPileup();
|
String probDists = pileup.getProbDistPileup();
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,26 @@ abstract public class BasicPileup implements Pileup {
|
||||||
return bases.toString();
|
return bases.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String baseWithStrandPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
|
StringBuilder bases = new StringBuilder();
|
||||||
|
|
||||||
|
for ( int i = 0; i < reads.size(); i++ ) {
|
||||||
|
SAMRecord read = reads.get(i);
|
||||||
|
int offset = offsets.get(i);
|
||||||
|
|
||||||
|
char base = (char) read.getReadBases()[offset];
|
||||||
|
|
||||||
|
base = Character.toUpperCase(base);
|
||||||
|
if (read.getReadNegativeStrandFlag()) {
|
||||||
|
base = Character.toLowerCase(base);
|
||||||
|
}
|
||||||
|
|
||||||
|
bases.append(base);
|
||||||
|
}
|
||||||
|
|
||||||
|
return bases.toString();
|
||||||
|
}
|
||||||
|
|
||||||
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
ArrayList<Byte> bases = new ArrayList(reads.size());
|
ArrayList<Byte> bases = new ArrayList(reads.size());
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for ( int i = 0; i < reads.size(); i++ ) {
|
||||||
|
|
@ -97,10 +117,9 @@ abstract public class BasicPileup implements Pileup {
|
||||||
|
|
||||||
byte[] compressedQuals = (byte[]) read.getAttribute("SQ");
|
byte[] compressedQuals = (byte[]) read.getAttribute("SQ");
|
||||||
byte base2;
|
byte base2;
|
||||||
//byte qual2;
|
|
||||||
if (compressedQuals != null && compressedQuals.length == read.getReadLength()) {
|
if (compressedQuals != null && compressedQuals.length == read.getReadLength()) {
|
||||||
base2 = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset]));
|
base2 = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset]));
|
||||||
//qual2 = QualityUtils.probToQual(QualityUtils.compressedQualityToProb(compressedQuals[offset]));
|
|
||||||
hasAtLeastOneSQField = true;
|
hasAtLeastOneSQField = true;
|
||||||
} else {
|
} else {
|
||||||
base2 = (byte) '.';
|
base2 = (byte) '.';
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,10 @@ public class ReadBackedPileup extends BasicPileup {
|
||||||
return basePileupAsString(reads, offsets);
|
return basePileupAsString(reads, offsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getBasesWithStrand() {
|
||||||
|
return baseWithStrandPileupAsString(reads, offsets);
|
||||||
|
}
|
||||||
|
|
||||||
public String getQuals() {
|
public String getQuals() {
|
||||||
return qualPileupAsString(reads, offsets);
|
return qualPileupAsString(reads, offsets);
|
||||||
}
|
}
|
||||||
|
|
@ -92,4 +96,13 @@ public class ReadBackedPileup extends BasicPileup {
|
||||||
qualsAsInts ? getQualsAsInts() : getQuals(),
|
qualsAsInts ? getQualsAsInts() : getQuals(),
|
||||||
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
|
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getPileupWithStrandString(boolean qualsAsInts) {
|
||||||
|
return String.format("%s %s %s %s %s %s",
|
||||||
|
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||||
|
getRef(), // reference base
|
||||||
|
getBasesWithStrand(),
|
||||||
|
qualsAsInts ? getQualsAsInts() : getQuals(),
|
||||||
|
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue