a few tweaks to make it more robust: ignore reads with cigars containing anything but I,D,M; don't set up contig ordering manually, rely upon reference sequence and its dictionary; don't die if a record does not have NM tag, but faal back to direct counting instead; now requires reference as a cmdline arg
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@378 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
32e000bbfe
commit
b4136b6d6e
|
|
@ -127,7 +127,7 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
|
||||||
System.out.printf(" Exception caught during parsing Pileup line: %s%n", Utils.join(" <=> ", parts));
|
System.out.printf(" Exception caught during parsing Pileup line: %s%n", Utils.join(" <=> ", parts));
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: multi-allelic variant at "+ loc.toString());
|
if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: multi-allelic variant : ("+refBaseChar+") -->"+toMediumString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -243,10 +243,13 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toMediumString() {
|
public String toMediumString() {
|
||||||
String s = String.format("%s:%s:%s", getLocation().toString(), name, observedString);
|
|
||||||
|
String s = null;
|
||||||
|
if ( refBaseChar == '*' ) s = String.format("%s:%s:%s", getLocation().toString(), name,observedString);
|
||||||
|
else s = String.format("%s:%s:%s/%s", getLocation().toString(), name, observedAlleles.get(0),observedAlleles.get(1));
|
||||||
if ( isSNP() ) s += ": SNP";
|
if ( isSNP() ) s += ": SNP";
|
||||||
else {
|
else {
|
||||||
if ( isInsertion() ) s += ": Insertoin";
|
if ( isInsertion() ) s += ": Insertion";
|
||||||
else {
|
else {
|
||||||
if ( isDeletion() ) s+= ": Deletion";
|
if ( isDeletion() ) s+= ": Deletion";
|
||||||
else {
|
else {
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,13 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
|
||||||
AllelicVariant dad = (rodSAMPileup)rodData.lookup("father", null);
|
AllelicVariant dad = (rodSAMPileup)rodData.lookup("father", null);
|
||||||
AllelicVariant kid = (rodSAMPileup)rodData.lookup("daughter", null);
|
AllelicVariant kid = (rodSAMPileup)rodData.lookup("daughter", null);
|
||||||
|
|
||||||
if ( ! hasCall(mom) || ! hasCall(dad) || ! hasCall(kid) ) return t; // at least one person is not called; nothing to do, bail out
|
if ( hasCall(mom)) t.mom_assessed = 1;
|
||||||
|
if ( hasCall(dad)) t.dad_assessed = 1;
|
||||||
|
if ( hasCall(kid)) t.kid_assessed = 1;
|
||||||
|
|
||||||
|
if ( hasCall(mom) && mom.isIndel() ) System.out.println("GOT INDEL: "+mom.toString());
|
||||||
|
|
||||||
|
if (( t.mom_assessed + t.dad_assessed + t.kid_assessed) != 3 ) return t; // at least one person is not called; nothing to do, bail out
|
||||||
|
|
||||||
// proceed only if we got confident calls
|
// proceed only if we got confident calls
|
||||||
|
|
||||||
|
|
@ -62,11 +68,11 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
|
||||||
mom_alleles.contains(kid_allele_2) && dad_alleles.contains(kid_allele_1) ) {
|
mom_alleles.contains(kid_allele_2) && dad_alleles.contains(kid_allele_1) ) {
|
||||||
t.consistent_snp = 1;
|
t.consistent_snp = 1;
|
||||||
|
|
||||||
logger.info("consistent SNP at "+context.getLocation() +
|
// logger.info("consistent SNP at "+context.getLocation() +
|
||||||
"("+ref+") " + mom_alleles.get(0)+"/" +mom_alleles.get(1) + " " +
|
// "("+ref+") " + mom_alleles.get(0)+"/" +mom_alleles.get(1) + " " +
|
||||||
dad_alleles.get(0)+"/" +dad_alleles.get(1) + " " +
|
// dad_alleles.get(0)+"/" +dad_alleles.get(1) + " " +
|
||||||
kid_allele_1+"/" +kid_allele_2
|
// kid_allele_1+"/" +kid_allele_2
|
||||||
);
|
// );
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,50 +27,56 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
@Option(doc="Output file (sam or bam) for non-indel related reads and indel reads that were not improved") public String OUT1;
|
@Option(doc="Output file (sam or bam) for non-indel related reads and indel reads that were not improved") public String OUT1;
|
||||||
@Option(doc="Output file (sam or bam) for improved (realigned) indel related reads") public String OUT2;
|
@Option(doc="Output file (sam or bam) for improved (realigned) indel related reads") public String OUT2;
|
||||||
@Option(doc="[paranoid] If true, all reads that would be otherwise picked and processed by this tool will be saved, unmodified, into OUT1", optional=true) public boolean CONTROL_RUN;
|
@Option(doc="[paranoid] If true, all reads that would be otherwise picked and processed by this tool will be saved, unmodified, into OUT1", optional=true) public boolean CONTROL_RUN;
|
||||||
@Option(doc="Error counting mode: MM - count mismatches only, ERR - count errors (arachne style), MG - count mismatches and gaps as one error each") public String ERR_MODE;
|
@Option(doc="Error counting mode: MM - mismatches only (from sam tags), MC - mismatches only doing actual mismatch count on the fly (use this if tags are incorrectly set); ERR - errors (arachne style: mm+gap lengths), MG - count mismatches and gaps as one error each") public String ERR_MODE;
|
||||||
@Option(doc="Maximum number of errors allowed (see ERR_MODE)") public Integer MAX_ERRS;
|
@Option(doc="Maximum number of errors allowed (see ERR_MODE)") public Integer MAX_ERRS;
|
||||||
// @Option(shortName="R", doc="Reference fasta or fasta.gz file") public File REF_FILE;
|
@Option(shortName="R", doc="Reference fasta or fasta.gz file") public File REF_FILE;
|
||||||
|
|
||||||
/** Required main method implementation. */
|
/** Required main method implementation. */
|
||||||
public static void main(final String[] argv) {
|
public static void main(final String[] argv) {
|
||||||
System.exit(new IndelInspector().instanceMain(argv));
|
System.exit(new IndelInspector().instanceMain(argv));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int doWork() {
|
protected int doWork() {
|
||||||
|
|
||||||
System.out.println("I am at version 0.3");
|
int discarded_count = 0;
|
||||||
|
|
||||||
|
ReferenceSequenceFileWalker reference = new ReferenceSequenceFileWalker(
|
||||||
|
REF_FILE
|
||||||
|
);
|
||||||
|
|
||||||
|
if ( reference.getSequenceDictionary() == null ) {
|
||||||
|
System.out.println("No reference sequence dictionary found. Abort.");
|
||||||
|
}
|
||||||
|
|
||||||
|
GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||||
GenomeLoc location = null;
|
GenomeLoc location = null;
|
||||||
if ( GENOME_LOCATION != null ) {
|
if ( GENOME_LOCATION != null ) {
|
||||||
location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION);
|
location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! ERR_MODE.equals("MM") && ! ERR_MODE.equals("MG") && ! ERR_MODE.equals("ERR") ) {
|
if ( ! ERR_MODE.equals("MM") && ! ERR_MODE.equals("MG") && ! ERR_MODE.equals("ERR") && ! ERR_MODE.equals("MC") ) {
|
||||||
System.out.println("Unknown value specified for ERR_MODE");
|
System.out.println("Unknown value specified for ERR_MODE: "+ERR_MODE);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
final SAMFileReader samReader = new SAMFileReader(getInputFile(INPUT_FILE,"/broad/1KG/"));
|
final SAMFileReader samReader = new SAMFileReader(getInputFile(INPUT_FILE,"/broad/1KG/"));
|
||||||
samReader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
samReader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||||
|
|
||||||
setContigOrdering(samReader);
|
// setContigOrdering(samReader);
|
||||||
|
|
||||||
ReferenceSequenceFileWalker reference = new ReferenceSequenceFileWalker(
|
|
||||||
new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
ReferenceSequence contig_seq = null;
|
ReferenceSequence contig_seq = null;
|
||||||
|
|
||||||
IndelRecordPileCollector col = null;
|
IndelRecordPileCollector col = null;
|
||||||
PassThroughWriter ptWriter = new PassThroughWriter(OUT1,samReader.getFileHeader());
|
PassThroughWriter ptWriter = new PassThroughWriter(OUT1,samReader.getFileHeader());
|
||||||
PileBuilder pileBuilder = null;
|
PileBuilder pileBuilder = null;
|
||||||
if ( ! CONTROL_RUN ) pileBuilder = new PileBuilder(OUT2,samReader.getFileHeader(),ptWriter);
|
if ( ! CONTROL_RUN ) pileBuilder = new PileBuilder(OUT2,samReader.getFileHeader(),ptWriter);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if ( CONTROL_RUN ) col = new IndelRecordPileCollector(ptWriter, new DiscardingPileReceiver() );
|
if ( CONTROL_RUN ) col = new IndelRecordPileCollector(ptWriter, new DiscardingPileReceiver() );
|
||||||
else col = new IndelRecordPileCollector(ptWriter, pileBuilder );
|
else col = new IndelRecordPileCollector(ptWriter, pileBuilder );
|
||||||
} catch(Exception e) { System.err.println(e.getMessage()); }
|
} catch(Exception e) { System.err.println(e.getMessage()); }
|
||||||
if ( col == null ) return 1;
|
if ( col == null ) return 1;
|
||||||
|
|
||||||
col.setControlRun(CONTROL_RUN);
|
col.setControlRun(CONTROL_RUN);
|
||||||
|
|
||||||
|
|
@ -91,9 +97,9 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
for ( SAMRecord r : samReader ) {
|
for ( SAMRecord r : samReader ) {
|
||||||
|
|
||||||
if ( r.getReadUnmappedFlag() ) continue;
|
if ( r.getReadUnmappedFlag() ) continue;
|
||||||
if ( r.getReferenceName() != cur_contig) {
|
if ( r.getReferenceName() != cur_contig) {
|
||||||
cur_contig = r.getReferenceName();
|
cur_contig = r.getReferenceName();
|
||||||
System.out.println("Contig "+cur_contig);
|
System.out.println("Contig "+cur_contig);
|
||||||
// if contig is specified and we are past that contig, we are done:
|
// if contig is specified and we are past that contig, we are done:
|
||||||
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == 1 ) break;
|
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == 1 ) break;
|
||||||
if ( location == null || GenomeLoc.compareContigs(cur_contig, location.getContig()) == 0 ) {
|
if ( location == null || GenomeLoc.compareContigs(cur_contig, location.getContig()) == 0 ) {
|
||||||
|
|
@ -105,18 +111,33 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if contig is specified and wqe did not reach it yet, skip the records until we reach that contig:
|
// if contig is specified and we did not reach it yet, skip the records until we reach that contig:
|
||||||
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == -1 ) continue;
|
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == -1 ) continue;
|
||||||
|
|
||||||
|
|
||||||
if ( location != null && r.getAlignmentEnd() < location.getStart() ) continue;
|
if ( location != null && r.getAlignmentEnd() < location.getStart() ) continue;
|
||||||
|
|
||||||
// if stop position is specified and we are past that, stop reading:
|
// if stop position is specified and we are past that, stop reading:
|
||||||
if ( location != null && r.getAlignmentStart() > location.getStop() ) break;
|
if ( location != null && r.getAlignmentStart() > location.getStop() ) break;
|
||||||
|
|
||||||
if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY")==1 ) continue; // skip chrM and unplaced contigs for now
|
// if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now
|
||||||
|
|
||||||
int err = -1;
|
// we currently do not know how to deal with cigars containing elements other than M,I,D, so
|
||||||
|
// let's just skip the reads that contain those other elements (clipped reads?)
|
||||||
|
Cigar c = r.getCigar();
|
||||||
|
boolean cigar_acceptable = true;
|
||||||
|
for ( int z = 0 ; z < c.numCigarElements() ; z++ ) {
|
||||||
|
CigarElement ce = c.getCigarElement(z);
|
||||||
|
switch ( ce.getOperator() ) {
|
||||||
|
case M:
|
||||||
|
case I:
|
||||||
|
case D: break;
|
||||||
|
default: cigar_acceptable = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( ! cigar_acceptable ) continue;
|
||||||
|
|
||||||
|
int err = -1;
|
||||||
/*
|
/*
|
||||||
System.out.println("MM: "+numMismatches(r));
|
System.out.println("MM: "+numMismatches(r));
|
||||||
System.out.println("direct: "+numMismatchesDirect(r,contig_seq));
|
System.out.println("direct: "+numMismatchesDirect(r,contig_seq));
|
||||||
|
|
@ -130,13 +151,14 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
continue;
|
continue;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if ( ERR_MODE.equals("MM")) err = numMismatches(r);
|
if ( ERR_MODE.equals("MM")) err = numMismatches(r,contig_seq);
|
||||||
else if ( ERR_MODE.equals("ERR")) err = numErrors(r);
|
else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(r,contig_seq);
|
||||||
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(r);
|
else if ( ERR_MODE.equals("ERR")) err = numErrors(r,contig_seq);
|
||||||
if ( err > MAX_ERRS.intValue() ) continue;
|
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(r,contig_seq);
|
||||||
// counter++;
|
if ( err > MAX_ERRS.intValue() ) continue;
|
||||||
// if ( counter % 1000000 == 0 ) System.out.println(counter+" records; "+col.memStatsString());
|
// counter++;
|
||||||
col.receive(r);
|
// if ( counter % 1000000 == 0 ) System.out.println(counter+" records; "+col.memStatsString());
|
||||||
|
col.receive(r);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -149,7 +171,7 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
samReader.close();
|
samReader.close();
|
||||||
ptWriter.close();
|
ptWriter.close();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD
|
/** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD
|
||||||
*
|
*
|
||||||
|
|
@ -157,10 +179,10 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
* @return number of errors (number of mismatches plus total length of all insertions/deletions
|
* @return number of errors (number of mismatches plus total length of all insertions/deletions
|
||||||
* @throws RuntimeException
|
* @throws RuntimeException
|
||||||
*/
|
*/
|
||||||
private static int numErrors(SAMRecord r) throws RuntimeException {
|
private static int numErrors(SAMRecord r, ReferenceSequence refseq) throws RuntimeException {
|
||||||
|
|
||||||
// NM currently stores the total number of mismatches in all blocks + 1
|
// NM currently stores the total number of mismatches in all blocks + 1
|
||||||
int errs = numMismatches(r);
|
int errs = numMismatches(r,refseq);
|
||||||
|
|
||||||
// now we have to add the total length of all indels:
|
// now we have to add the total length of all indels:
|
||||||
Cigar c = r.getCigar();
|
Cigar c = r.getCigar();
|
||||||
|
|
@ -185,10 +207,10 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
* deletion will be counted as a single error regardless of the length)
|
* deletion will be counted as a single error regardless of the length)
|
||||||
* @throws RuntimeException
|
* @throws RuntimeException
|
||||||
*/
|
*/
|
||||||
private static int numMismatchesGaps(SAMRecord r) throws RuntimeException {
|
private static int numMismatchesGaps(SAMRecord r,ReferenceSequence refseq) throws RuntimeException {
|
||||||
|
|
||||||
// NM currently stores the total number of mismatches in all blocks + 1
|
// NM currently stores the total number of mismatches in all blocks + 1
|
||||||
int errs = numMismatches(r);
|
int errs = numMismatches(r,refseq);
|
||||||
|
|
||||||
// now we have to add the total length of all indels:
|
// now we have to add the total length of all indels:
|
||||||
Cigar c = r.getCigar();
|
Cigar c = r.getCigar();
|
||||||
|
|
@ -207,12 +229,14 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */
|
/** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */
|
||||||
private static int numMismatches(SAMRecord r) throws RuntimeException {
|
private static int numMismatches(SAMRecord r, ReferenceSequence refseq) throws RuntimeException {
|
||||||
|
|
||||||
// NM currently stores the total number of mismatches in all blocks + 1
|
// NM currently stores the total number of mismatches in all blocks + 1
|
||||||
return ((Integer)r.getAttribute("NM")).intValue() - 1;
|
Integer i = (Integer)r.getAttribute("NM");
|
||||||
|
if ( i == null ) return AlignmentUtils.numMismatches(r,refseq);
|
||||||
|
return ((Integer)r.getAttribute("NM")).intValue() - 1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Trivial utility method that goes some distance trying to ensure that the input file is there;
|
/** Trivial utility method that goes some distance trying to ensure that the input file is there;
|
||||||
* the only purpose is reducing clutter in main(). Receives a default
|
* the only purpose is reducing clutter in main(). Receives a default
|
||||||
|
|
@ -265,7 +289,7 @@ public class IndelInspector extends CommandLineProgram {
|
||||||
private void setDefaultContigOrdering() {
|
private void setDefaultContigOrdering() {
|
||||||
Map<String,Integer> rco = new HashMap<String,Integer>();
|
Map<String,Integer> rco = new HashMap<String,Integer>();
|
||||||
rco.put("chrM",0);
|
rco.put("chrM",0);
|
||||||
for ( int i = 1 ; i <= 22 ; i++ ) rco.put("chr"+i,i);
|
for ( int i = 1 ; i <= 22 ; i++ ) rco.put(Integer.toString(i),i);//rco.put("chr"+i,i);
|
||||||
rco.put("chrX",23);
|
rco.put("chrX",23);
|
||||||
rco.put("chrY",24);
|
rco.put("chrY",24);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -65,26 +65,26 @@ import net.sf.samtools.*;
|
||||||
*/
|
*/
|
||||||
public class IndelRecordPileCollector implements RecordReceiver {
|
public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
|
|
||||||
private final int WAIT_STATE = 0;
|
private final int WAIT_STATE = 0;
|
||||||
private final int ACTIVE_STATE = 1;
|
private final int ACTIVE_STATE = 1;
|
||||||
|
|
||||||
private boolean avoiding_region; // some regions are too funky (contain very long indel trains)-
|
private boolean avoiding_region; // some regions are too funky (contain very long indel trains)-
|
||||||
// we will determine their span and report them,
|
// we will determine their span and report them,
|
||||||
// but we won't be counting any indels there or building piles
|
// but we won't be counting any indels there or building piles
|
||||||
|
|
||||||
private List<SAMRecord> mRecordPile; // here we keep the records before we decide how we want to emit them
|
private List<SAMRecord> mRecordPile; // here we keep the records before we decide how we want to emit them
|
||||||
private TreeSet<CountedObject<Indel> > mAllIndels; ///< individual indels encountered, with observation counts
|
private TreeSet<CountedObject<Indel> > mAllIndels; ///< individual indels encountered, with observation counts
|
||||||
private int mLastContig ; ///< keeps the index of the contig last alignment was on
|
private int mLastContig ; ///< keeps the index of the contig last alignment was on
|
||||||
private int mLastStartOnRef; ///< keeps the start position of the last alignment
|
private int mLastStartOnRef; ///< keeps the start position of the last alignment
|
||||||
private int mState; ///< WAIT_STATE or ACTIVE_STATE
|
private int mState; ///< WAIT_STATE or ACTIVE_STATE
|
||||||
private int mIndelSeparation; ///< Indels that are farther away from one another than this value
|
private int mIndelSeparation; ///< Indels that are farther away from one another than this value
|
||||||
///< will be emitted separately; trains of indels with less then
|
///< will be emitted separately; trains of indels with less then
|
||||||
///< mIndelSeparation bases between each adjacent pair will be emitted
|
///< mIndelSeparation bases between each adjacent pair will be emitted
|
||||||
///< as one pile.
|
///< as one pile.
|
||||||
|
|
||||||
// we will build histograms (distributions) of encountered indel lengths on the fly
|
// we will build histograms (distributions) of encountered indel lengths on the fly
|
||||||
private List<Integer> mIndelLengthHistI;
|
private List<Integer> mIndelLengthHistI;
|
||||||
private List<Integer> mIndelLengthHistD;
|
private List<Integer> mIndelLengthHistD;
|
||||||
|
|
||||||
private RecordReceiver defaultReceiver; // we will send there records that do not overlap with regions of interest
|
private RecordReceiver defaultReceiver; // we will send there records that do not overlap with regions of interest
|
||||||
private RecordPileReceiver indelPileReceiver; // piles over indel regions will be sent there
|
private RecordPileReceiver indelPileReceiver; // piles over indel regions will be sent there
|
||||||
|
|
@ -97,27 +97,27 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
String s = "mRecordPile: ";
|
String s = "mRecordPile: ";
|
||||||
return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef;
|
return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef;
|
||||||
//+" Bndries="+mIndelRegionStart +":"+ mIndelRegionStop;
|
//+" Bndries="+mIndelRegionStart +":"+ mIndelRegionStop;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public IndelRecordPileCollector(RecordReceiver rr, RecordPileReceiver rp) throws java.io.IOException {
|
public IndelRecordPileCollector(RecordReceiver rr, RecordPileReceiver rp) throws java.io.IOException {
|
||||||
mRecordPile = new LinkedList<SAMRecord>();
|
mRecordPile = new LinkedList<SAMRecord>();
|
||||||
mAllIndels = new TreeSet<CountedObject<Indel> >(
|
mAllIndels = new TreeSet<CountedObject<Indel> >(
|
||||||
new CountedObjectComparatorAdapter<Indel>(new IntervalComparator()));
|
new CountedObjectComparatorAdapter<Indel>(new IntervalComparator()));
|
||||||
mLastContig = -1;
|
mLastContig = -1;
|
||||||
mLastStartOnRef = -1;
|
mLastStartOnRef = -1;
|
||||||
mIndelSeparation = 51;
|
mIndelSeparation = 51;
|
||||||
mIndelLengthHistI = new ArrayList<Integer>();
|
mIndelLengthHistI = new ArrayList<Integer>();
|
||||||
mIndelLengthHistD = new ArrayList<Integer>();
|
mIndelLengthHistD = new ArrayList<Integer>();
|
||||||
for ( int i = 0 ; i < 5 ; i++ ) {
|
for ( int i = 0 ; i < 5 ; i++ ) {
|
||||||
mIndelLengthHistI.add(0);
|
mIndelLengthHistI.add(0);
|
||||||
mIndelLengthHistD.add(0);
|
mIndelLengthHistD.add(0);
|
||||||
}
|
}
|
||||||
defaultReceiver = rr;
|
defaultReceiver = rr;
|
||||||
indelPileReceiver = rp;
|
indelPileReceiver = rp;
|
||||||
referenceSequence = null;
|
referenceSequence = null;
|
||||||
setWaitState();
|
setWaitState();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Fully reinitializes wait state: clears record pile and indel list, resets flags and states.
|
/** Fully reinitializes wait state: clears record pile and indel list, resets flags and states.
|
||||||
* Does not emit records, just clears/resets the variables.
|
* Does not emit records, just clears/resets the variables.
|
||||||
|
|
@ -137,15 +137,15 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
referenceSequence = contig;
|
referenceSequence = contig;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile
|
/** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile
|
||||||
* all the consequtive records with alignment end positions less than or equal to the specified
|
* all the consequtive records with alignment end positions less than or equal to the specified
|
||||||
* position <code>pos</code>, until the first record is encountered that does not meet this condition. Note that
|
* position <code>pos</code>, until the first record is encountered that does not meet this condition. Note that
|
||||||
* there might be more alignments that end at or before <code>pos</code> later on in the pile, but
|
* there might be more alignments that end at or before <code>pos</code> later on in the pile, but
|
||||||
* they will <i> nit</i> be emitted/removed by this method.
|
* they will <i> nit</i> be emitted/removed by this method.
|
||||||
* @param pos all leading records with alignments ending before or at this position will be purged from the pile,
|
* @param pos all leading records with alignments ending before or at this position will be purged from the pile,
|
||||||
* up to the first record that does not end at or before pos.
|
* up to the first record that does not end at or before pos.
|
||||||
*/
|
*/
|
||||||
protected void purgeRecordsEndingAtOrBefore(final long pos) {
|
protected void purgeRecordsEndingAtOrBefore(final long pos) {
|
||||||
Iterator<SAMRecord> i = mRecordPile.iterator();
|
Iterator<SAMRecord> i = mRecordPile.iterator();
|
||||||
while ( i.hasNext() ) {
|
while ( i.hasNext() ) {
|
||||||
SAMRecord r = i.next();
|
SAMRecord r = i.next();
|
||||||
|
|
@ -154,13 +154,13 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
i.remove();
|
i.remove();
|
||||||
} else break;
|
} else break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A utility method: purges from the currently held SAM record pile all the records with alignment
|
/** A utility method: purges from the currently held SAM record pile all the records with alignment
|
||||||
* start positions greater than or equal to the specified position <code>pos</code>
|
* start positions greater than or equal to the specified position <code>pos</code>
|
||||||
* @param pos all records with alignments starting at or after this position will be purged from the pile
|
* @param pos all records with alignments starting at or after this position will be purged from the pile
|
||||||
*/
|
*/
|
||||||
protected void purgeRecordsStartingAtOrAfter(final int pos) {
|
protected void purgeRecordsStartingAtOrAfter(final int pos) {
|
||||||
Iterator<SAMRecord> i = mRecordPile.iterator();
|
Iterator<SAMRecord> i = mRecordPile.iterator();
|
||||||
while ( i.hasNext() ) {
|
while ( i.hasNext() ) {
|
||||||
SAMRecord r = i.next();
|
SAMRecord r = i.next();
|
||||||
|
|
@ -169,7 +169,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
i.remove();
|
i.remove();
|
||||||
} else break;
|
} else break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This is the main interface method of the collector: it receives alignments, inspects them, detects indels,
|
/** This is the main interface method of the collector: it receives alignments, inspects them, detects indels,
|
||||||
* updates and purges the read pile it keeps and emits alignments as needed.
|
* updates and purges the read pile it keeps and emits alignments as needed.
|
||||||
|
|
@ -198,46 +198,46 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
*/
|
*/
|
||||||
public void receive(final SAMRecord r) throws RuntimeException {
|
public void receive(final SAMRecord r) throws RuntimeException {
|
||||||
|
|
||||||
if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do
|
if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do
|
||||||
|
|
||||||
if ( controlRun ) {
|
if ( controlRun ) {
|
||||||
defaultReceiver.receive(r);
|
defaultReceiver.receive(r);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int currContig = r.getReferenceIndex();
|
int currContig = r.getReferenceIndex();
|
||||||
int currPos = r.getAlignmentStart();
|
int currPos = r.getAlignmentStart();
|
||||||
|
|
||||||
if ( currContig < mLastContig ) throw new RuntimeException("SAM file is not ordered by contigs");
|
if ( currContig < mLastContig ) throw new RuntimeException("SAM file is not ordered by contigs");
|
||||||
if ( currContig == mLastContig && currPos < mLastStartOnRef ) throw new RuntimeException("SAM file is not ordered by start positions");
|
if ( currContig == mLastContig && currPos < mLastStartOnRef ) throw new RuntimeException("SAM file is not ordered by start positions");
|
||||||
|
|
||||||
if ( currContig > mLastContig ) {
|
if ( currContig > mLastContig ) {
|
||||||
// we jumped onto a new contig; emit everything we might have been building and purge the piles:
|
// we jumped onto a new contig; emit everything we might have been building and purge the piles:
|
||||||
emit();
|
emit();
|
||||||
} else { // still on the same contig:
|
} else { // still on the same contig:
|
||||||
|
|
||||||
switch (mState) {
|
switch (mState) {
|
||||||
// everything ending up to currPos is guaranteed to have no overlaps with indels yet to come
|
// everything ending up to currPos is guaranteed to have no overlaps with indels yet to come
|
||||||
case WAIT_STATE: purgeRecordsEndingAtOrBefore(currPos); break;
|
case WAIT_STATE: purgeRecordsEndingAtOrBefore(currPos); break;
|
||||||
|
|
||||||
// next indel can start only after currPos (whether it is in the current read or in the
|
// next indel can start only after currPos (whether it is in the current read or in the
|
||||||
// reads yet to come). If it is far enough from the last indel we have seen, we can emit
|
// reads yet to come). If it is far enough from the last indel we have seen, we can emit
|
||||||
case ACTIVE_STATE: if ( currPos - mAllIndels.last().getObject().getStop() > mIndelSeparation ) emit(); break;
|
case ACTIVE_STATE: if ( currPos - mAllIndels.last().getObject().getStop() > mIndelSeparation ) emit(); break;
|
||||||
default: throw new RuntimeException("Unknown state");
|
default: throw new RuntimeException("Unknown state");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active'
|
// does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active'
|
||||||
extractIndelsAndUpdateState(r.getCigar(),currPos);
|
extractIndelsAndUpdateState(r.getCigar(),currPos);
|
||||||
|
|
||||||
if ( ! avoiding_region && mAllIndels.size() > 20 ) avoiding_region = true;
|
if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) avoiding_region = true;
|
||||||
|
|
||||||
if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region
|
if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region
|
||||||
|
|
||||||
mLastContig = currContig;
|
mLastContig = currContig;
|
||||||
mLastStartOnRef = currPos;
|
mLastStartOnRef = currPos;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Emits all reads from the currently held pile, cleans the pile and fully reinitializes wait state
|
/** Emits all reads from the currently held pile, cleans the pile and fully reinitializes wait state
|
||||||
* (clears indel list etc).
|
* (clears indel list etc).
|
||||||
|
|
@ -247,35 +247,37 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
* splits the train (and the pile) into multiple trains/piles as needed (i.e. if there are pairs of adjacent
|
* splits the train (and the pile) into multiple trains/piles as needed (i.e. if there are pairs of adjacent
|
||||||
* indels that are not overlapped by any read), and emits the final piles of records into indelReceiver.
|
* indels that are not overlapped by any read), and emits the final piles of records into indelReceiver.
|
||||||
*/
|
*/
|
||||||
private void emit() {
|
private void emit() {
|
||||||
|
|
||||||
if ( mState == WAIT_STATE || avoiding_region ) {
|
if ( mState == WAIT_STATE || avoiding_region ) {
|
||||||
if ( avoiding_region ) {
|
if ( avoiding_region ) {
|
||||||
long start = mAllIndels.first().getObject().getStart();
|
long start = mAllIndels.first().getObject().getStart();
|
||||||
long stop = mAllIndels.last().getObject().getStop();
|
long stop = mAllIndels.last().getObject().getStop();
|
||||||
System.out.println("Genomic region "+mLastContig+":"+ start + "-"+ stop +
|
System.out.println("Genomic region "+mLastContig+":"+ start + "-"+ stop +
|
||||||
" was ignored: "+mAllIndels.size() +" unique indels with average distance of "+
|
" was ignored: ");
|
||||||
|
System.out.println(" "+mAllIndels.size() +" unique indels with average distance of "+
|
||||||
((double)(stop - start))/((double)mAllIndels.size()-1) +
|
((double)(stop - start))/((double)mAllIndels.size()-1) +
|
||||||
" bases between indels");
|
" bases between indels");
|
||||||
|
System.out.println(" "+mRecordPile.size() +" reads collected before aborting");
|
||||||
}
|
}
|
||||||
|
|
||||||
// no indels or avoiding indels in bad region: send all records to defaultReceiver and clear the pile
|
// no indels or avoiding indels in bad region: send all records to defaultReceiver and clear the pile
|
||||||
for ( SAMRecord r : mRecordPile ) {
|
for ( SAMRecord r : mRecordPile ) {
|
||||||
defaultReceiver.receive(r);
|
defaultReceiver.receive(r);
|
||||||
}
|
}
|
||||||
setWaitState();
|
setWaitState();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// last minute cleanup:
|
// last minute cleanup:
|
||||||
// at this stage we have all the indels collected conservatively (in a sense
|
// at this stage we have all the indels collected conservatively (in a sense
|
||||||
// that they can be farther away than it is needed) - this means that there actually
|
// that they can be farther away than it is needed) - this means that there actually
|
||||||
// can be more than one pile in what we have stored. Also, we can still have gapless reads
|
// can be more than one pile in what we have stored. Also, we can still have gapless reads
|
||||||
// at the ends of the piles that do not really overlap with indel sites.
|
// at the ends of the piles that do not really overlap with indel sites.
|
||||||
|
|
||||||
if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels");
|
if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels");
|
||||||
|
|
||||||
HistogramAsNeeded(mAllIndels);
|
HistogramAsNeeded(mAllIndels);
|
||||||
|
|
||||||
|
|
||||||
// indels are in a sorted map, and reads were added to the pile in the order they were received (also sorted).
|
// indels are in a sorted map, and reads were added to the pile in the order they were received (also sorted).
|
||||||
|
|
@ -350,19 +352,19 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
}
|
}
|
||||||
|
|
||||||
setWaitState();
|
setWaitState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Looks for indels in the cigar and, if finds any, updates list of indels in the current train ans sets
|
/** Looks for indels in the cigar and, if finds any, updates list of indels in the current train ans sets
|
||||||
* the state to 'active'. If cigar contains no indels, this method does not do anything (it does <i>not</i>
|
* the state to 'active'. If cigar contains no indels, this method does not do anything (it does <i>not</i>
|
||||||
* set state back to 'wait' either!). If this method finds any indels in the cigar, it first tries to find them
|
* set state back to 'wait' either!). If this method finds any indels in the cigar, it first tries to find them
|
||||||
* in the list of previously seen indels. If the indel was already seen before, its counter is updated (indels
|
* in the list of previously seen indels. If the indel was already seen before, its counter is updated (indels
|
||||||
* are stored in the list as counted objects), oherwise indel is added to the list with initial count of 1.
|
* are stored in the list as counted objects), oherwise indel is added to the list with initial count of 1.
|
||||||
*
|
*
|
||||||
* @param c alignment cigar; if it contains no indels, nothing will be done
|
* @param c alignment cigar; if it contains no indels, nothing will be done
|
||||||
* @param start position, at which the alignment represented by cigar <code>c</code> starts on the reference
|
* @param start position, at which the alignment represented by cigar <code>c</code> starts on the reference
|
||||||
*/
|
*/
|
||||||
private void extractIndelsAndUpdateState(final Cigar c, final int start) {
|
private void extractIndelsAndUpdateState(final Cigar c, final int start) {
|
||||||
//
|
//
|
||||||
// firstpos,lastpos span of the indel will be interpreted as follows:
|
// firstpos,lastpos span of the indel will be interpreted as follows:
|
||||||
// any alignment that ends strictly before firstpos or starts strictly after lastpos
|
// any alignment that ends strictly before firstpos or starts strictly after lastpos
|
||||||
|
|
@ -387,12 +389,12 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
|
|
||||||
if ( c.numCigarElements() == 1 ) return; // most of the reads have no indels, save a few cycles by returning early
|
if ( c.numCigarElements() == 1 ) return; // most of the reads have no indels, save a few cycles by returning early
|
||||||
|
|
||||||
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
|
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
|
||||||
|
|
||||||
final CigarElement ce = c.getCigarElement(i);
|
final CigarElement ce = c.getCigarElement(i);
|
||||||
Indel indel = null;
|
Indel indel = null;
|
||||||
|
|
||||||
switch(ce.getOperator()) {
|
switch(ce.getOperator()) {
|
||||||
case I: indel = new Indel(runninglength, ce.getLength(), IndelType.I); break;
|
case I: indel = new Indel(runninglength, ce.getLength(), IndelType.I); break;
|
||||||
case D: indel = new Indel(runninglength, ce.getLength(), IndelType.D);
|
case D: indel = new Indel(runninglength, ce.getLength(), IndelType.D);
|
||||||
runninglength += ce.getLength();
|
runninglength += ce.getLength();
|
||||||
|
|
@ -400,9 +402,9 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment
|
case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment
|
||||||
default :
|
default :
|
||||||
throw new IllegalArgumentException("Unexpected operator in cigar string");
|
throw new IllegalArgumentException("Unexpected operator in cigar string");
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( indel == null ) continue; // element was not an indel, go grab next element...
|
if ( indel == null ) continue; // element was not an indel, go grab next element...
|
||||||
|
|
||||||
mState = ACTIVE_STATE; // this is redundant and will be executed unnecessarily many times, but it's cheap...
|
mState = ACTIVE_STATE; // this is redundant and will be executed unnecessarily many times, but it's cheap...
|
||||||
|
|
||||||
|
|
@ -411,82 +413,82 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
|
|
||||||
if ( indelWithCount.equals( found ) ) found.increment(); // we did find our indel, advance the counter
|
if ( indelWithCount.equals( found ) ) found.increment(); // we did find our indel, advance the counter
|
||||||
else mAllIndels.add(indelWithCount); // this is a new indel. Add it.
|
else mAllIndels.add(indelWithCount); // this is a new indel. Add it.
|
||||||
} // end for loop over all alignment cigar elements
|
} // end for loop over all alignment cigar elements
|
||||||
|
|
||||||
} // end extractIndels() method
|
} // end extractIndels() method
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** Counts the size of the passed <indel> argument into the appropriate size histogram
|
/** Counts the size of the passed <indel> argument into the appropriate size histogram
|
||||||
*
|
*
|
||||||
* @param indel size of this indel will be counted in
|
* @param indel size of this indel will be counted in
|
||||||
*/
|
*/
|
||||||
private void addToSizeHistogram(Indel indel) {
|
private void addToSizeHistogram(Indel indel) {
|
||||||
// count this indel's size into the appropriate bin of the appropriate histogram
|
// count this indel's size into the appropriate bin of the appropriate histogram
|
||||||
// (we count insertions and deletions separately), resizing the histogram array if needed:
|
// (we count insertions and deletions separately), resizing the histogram array if needed:
|
||||||
List<Integer> histogram;
|
List<Integer> histogram;
|
||||||
if ( indel.getType() == Indel.IndelType.D ) {
|
if ( indel.getType() == Indel.IndelType.D ) {
|
||||||
histogram = mIndelLengthHistD;
|
histogram = mIndelLengthHistD;
|
||||||
} else if ( indel.getType() == Indel.IndelType.I ) {
|
} else if ( indel.getType() == Indel.IndelType.I ) {
|
||||||
histogram = mIndelLengthHistI;
|
histogram = mIndelLengthHistI;
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("Indel of unknown type");
|
throw new RuntimeException("Indel of unknown type");
|
||||||
}
|
}
|
||||||
if( indel.getIndelLength() > histogram.size() ) {
|
if( indel.getIndelLength() > histogram.size() ) {
|
||||||
for ( int j = histogram.size() ; j < indel.getIndelLength() ; j++ ) histogram.add(0);
|
for ( int j = histogram.size() ; j < indel.getIndelLength() ; j++ ) histogram.add(0);
|
||||||
histogram.set((int)indel.getIndelLength()-1, 1); // we are seeing this length for the first time, so count == 1
|
histogram.set((int)indel.getIndelLength()-1, 1); // we are seeing this length for the first time, so count == 1
|
||||||
} else {
|
} else {
|
||||||
int n = histogram.get((int)indel.getIndelLength()-1);
|
int n = histogram.get((int)indel.getIndelLength()-1);
|
||||||
histogram.set((int)indel.getIndelLength()-1, n+1);
|
histogram.set((int)indel.getIndelLength()-1, n+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Adds sizes of the indels from the list that pass some filters to the histograms
|
/** Adds sizes of the indels from the list that pass some filters to the histograms
|
||||||
*
|
*
|
||||||
* @param indels collection of indels with counts
|
* @param indels collection of indels with counts
|
||||||
*/
|
*/
|
||||||
private void HistogramAsNeeded(Collection<CountedObject<Indel>> indels) {
|
private void HistogramAsNeeded(Collection<CountedObject<Indel>> indels) {
|
||||||
for ( CountedObject<Indel> o : indels ) {
|
for ( CountedObject<Indel> o : indels ) {
|
||||||
if ( o.getCount() >= 2 ) addToSizeHistogram(o.getObject());
|
if ( o.getCount() >= 2 ) addToSizeHistogram(o.getObject());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Retruns true if the indel run has to be printed into output; currently, indel run is acceptable
|
/** Retruns true if the indel run has to be printed into output; currently, indel run is acceptable
|
||||||
* if it contains at least one indel onbserved more than once.
|
* if it contains at least one indel onbserved more than once.
|
||||||
* @param indels list of indels with counts to check for being acceptable
|
* @param indels list of indels with counts to check for being acceptable
|
||||||
* @return true if the indel run has to be printed
|
* @return true if the indel run has to be printed
|
||||||
*/
|
*/
|
||||||
private boolean shouldAcceptForOutput(List<CountedObject<Indel>> indels) {
|
private boolean shouldAcceptForOutput(List<CountedObject<Indel>> indels) {
|
||||||
for ( CountedObject<Indel> o : indels ) {
|
for ( CountedObject<Indel> o : indels ) {
|
||||||
if ( o.getCount() >= 2 ) return true;
|
if ( o.getCount() >= 2 ) return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String formatRange(List<CountedObject<Indel>> indels) {
|
private String formatRange(List<CountedObject<Indel>> indels) {
|
||||||
StringBuffer b = new StringBuffer();
|
StringBuffer b = new StringBuffer();
|
||||||
StringBuffer all = new StringBuffer();
|
StringBuffer all = new StringBuffer();
|
||||||
|
|
||||||
long min = 1000000000;
|
long min = 1000000000;
|
||||||
long max = 0;
|
long max = 0;
|
||||||
|
|
||||||
all.append("; passing indels:");
|
all.append("; passing indels:");
|
||||||
for ( CountedObject<Indel> o : indels ) {
|
for ( CountedObject<Indel> o : indels ) {
|
||||||
if ( o.getCount() < 2 ) continue;
|
if ( o.getCount() < 2 ) continue;
|
||||||
all.append(" ");
|
all.append(" ");
|
||||||
all.append(o.getObject().getIndelLength());
|
all.append(o.getObject().getIndelLength());
|
||||||
if ( o.getObject().getIndelLength() < min ) min = o.getObject().getIndelLength();
|
if ( o.getObject().getIndelLength() < min ) min = o.getObject().getIndelLength();
|
||||||
if ( o.getObject().getIndelLength() > max ) max = o.getObject().getIndelLength();
|
if ( o.getObject().getIndelLength() > max ) max = o.getObject().getIndelLength();
|
||||||
}
|
}
|
||||||
if ( max == 0 ) return new String(); // no passinf indels, return empty string
|
if ( max == 0 ) return new String(); // no passinf indels, return empty string
|
||||||
|
|
||||||
b.append(" passing min length: ");
|
b.append(" passing min length: ");
|
||||||
b.append(min);
|
b.append(min);
|
||||||
b.append("; passing max length: ");
|
b.append("; passing max length: ");
|
||||||
b.append(max);
|
b.append(max);
|
||||||
b.append(all);
|
b.append(all);
|
||||||
return b.toString();
|
return b.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printLengthHistograms() {
|
public void printLengthHistograms() {
|
||||||
if ( mIndelLengthHistD.size() < mIndelLengthHistI.size() ) {
|
if ( mIndelLengthHistD.size() < mIndelLengthHistI.size() ) {
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,15 @@ public class TrioConcordanceRecord {
|
||||||
public int inconsistent_indels_in_parent = 0 ;
|
public int inconsistent_indels_in_parent = 0 ;
|
||||||
public int inconsistent_indels_in_kid = 0 ;
|
public int inconsistent_indels_in_kid = 0 ;
|
||||||
public int non_biallelic = 0; // number of variant calls that are not biallelic
|
public int non_biallelic = 0; // number of variant calls that are not biallelic
|
||||||
|
public long mom_assessed = 0; // number of assessed loci for mother (i.e. passing confidence threshold filter)
|
||||||
|
public long dad_assessed = 0;
|
||||||
|
public long kid_assessed = 0;
|
||||||
|
public long mom_ref = 0; // number of reference calls (out of total assessed)
|
||||||
|
public long dad_ref = 0;
|
||||||
|
public long kid_ref = 0;
|
||||||
|
public long mom_snp = 0; // number of snp calls (out of total assessed)
|
||||||
|
public long dad_snp = 0;
|
||||||
|
public long kid_snp = 0;
|
||||||
|
|
||||||
public TrioConcordanceRecord add(TrioConcordanceRecord other) {
|
public TrioConcordanceRecord add(TrioConcordanceRecord other) {
|
||||||
this.assessed_loci += other.assessed_loci;
|
this.assessed_loci += other.assessed_loci;
|
||||||
|
|
@ -32,6 +41,15 @@ public class TrioConcordanceRecord {
|
||||||
this.inconsistent_indels_in_parent += other.inconsistent_indels_in_parent;
|
this.inconsistent_indels_in_parent += other.inconsistent_indels_in_parent;
|
||||||
this.inconsistent_indels_in_kid += other.inconsistent_indels_in_kid;
|
this.inconsistent_indels_in_kid += other.inconsistent_indels_in_kid;
|
||||||
this.non_biallelic += other.non_biallelic;
|
this.non_biallelic += other.non_biallelic;
|
||||||
|
this.mom_assessed += other.mom_assessed;
|
||||||
|
this.dad_assessed += other.dad_assessed;
|
||||||
|
this.kid_assessed += other.kid_assessed;
|
||||||
|
this.mom_ref += other.mom_ref;
|
||||||
|
this.dad_ref += other.dad_ref;
|
||||||
|
this.kid_ref += other.kid_ref;
|
||||||
|
this.mom_snp += other.mom_snp;
|
||||||
|
this.dad_snp += other.dad_snp;
|
||||||
|
this.kid_snp += other.kid_snp;
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
@ -39,8 +57,11 @@ public class TrioConcordanceRecord {
|
||||||
public int totalSNP() { return consistent_snp + inconsistent_snp + non_biallelic; }
|
public int totalSNP() { return consistent_snp + inconsistent_snp + non_biallelic; }
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("assessed: %d; reference: %d (%3.2f); total snp: %d; consistent snp: %d (%3.2f); multiallelic: %d (%3.2f); " ,
|
return String.format("%ntotal assessed in trio: %d%n reference: %d (%3.2f)%n total snp: %d%n consistent snp: %d (%3.2f)%n multiallelic: %d (%3.2f)%nper trio individual:%n assessed:%n mother: %d%n father: %d%n daughter: %d%n" ,
|
||||||
assessed_loci, consistent_ref, ((double)consistent_ref*100.00)/assessed_loci,totalSNP(), consistent_snp, ((double)consistent_snp*100.0)/totalSNP(),
|
assessed_loci, consistent_ref, ((double)consistent_ref*100.00)/assessed_loci,totalSNP(), consistent_snp, ((double)consistent_snp*100.0)/totalSNP(),
|
||||||
non_biallelic, ((double)non_biallelic*100.0)/totalSNP());
|
non_biallelic, ((double)non_biallelic*100.0)/totalSNP(),mom_assessed,dad_assessed,kid_assessed);
|
||||||
|
// return String.format("total assessed in trio: %d%n reference: %d (%3.2f)%n total snp: %d%n consistent snp: %d (%3.2f)%n multiallelic: %d (%3.2f)" ,
|
||||||
|
// assessed_loci, consistent_ref, ((double)consistent_ref*100.00)/assessed_loci,totalSNP(), consistent_snp, ((double)consistent_snp*100.0)/totalSNP(),
|
||||||
|
// non_biallelic, ((double)non_biallelic*100.0)/totalSNP());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue