From b4136b6d6e40175b34dffeba560c9f4cb2d64b79 Mon Sep 17 00:00:00 2001 From: asivache Date: Mon, 13 Apr 2009 04:49:19 +0000 Subject: [PATCH] a few tweaks to make it more robust: ignore reads with cigars containing anything but I,D,M; don't set up contig ordering manually, rely upon reference sequence and its dictionary; don't die if a record does not have NM tag, but faal back to direct counting instead; now requires reference as a cmdline arg git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@378 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/rodSAMPileup.java | 9 +- .../walkers/MendelianInheritanceWalker.java | 18 +- .../playground/indels/IndelInspector.java | 116 ++++--- .../indels/IndelRecordPileCollector.java | 312 +++++++++--------- .../utils/TrioConcordanceRecord.java | 25 +- 5 files changed, 268 insertions(+), 212 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodSAMPileup.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodSAMPileup.java index 3a6823c19..67b7db757 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodSAMPileup.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodSAMPileup.java @@ -127,7 +127,7 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian System.out.printf(" Exception caught during parsing Pileup line: %s%n", Utils.join(" <=> ", parts)); throw e; } - if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: multi-allelic variant at "+ loc.toString()); + if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: multi-allelic variant : ("+refBaseChar+") -->"+toMediumString()); } @@ -243,10 +243,13 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian } public String toMediumString() { - String s = String.format("%s:%s:%s", getLocation().toString(), name, observedString); + + String s = null; + if ( refBaseChar == '*' ) s = String.format("%s:%s:%s", getLocation().toString(), name,observedString); + else s = String.format("%s:%s:%s/%s", getLocation().toString(), name, observedAlleles.get(0),observedAlleles.get(1)); if ( isSNP() ) s += ": SNP"; else { - if ( isInsertion() ) s += ": Insertoin"; + if ( isInsertion() ) s += ": Insertion"; else { if ( isDeletion() ) s+= ": Deletion"; else { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MendelianInheritanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MendelianInheritanceWalker.java index fde51845f..0b28416e4 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MendelianInheritanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MendelianInheritanceWalker.java @@ -31,7 +31,13 @@ public class MendelianInheritanceWalker extends RefWalker location.getStop() ) break; + if ( location != null && r.getAlignmentStart() > location.getStop() ) break; - if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY")==1 ) continue; // skip chrM and unplaced contigs for now - - int err = -1; + // if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now + + // we currently do not know how to deal with cigars containing elements other than M,I,D, so + // let's just skip the reads that contain those other elements (clipped reads?) + Cigar c = r.getCigar(); + boolean cigar_acceptable = true; + for ( int z = 0 ; z < c.numCigarElements() ; z++ ) { + CigarElement ce = c.getCigarElement(z); + switch ( ce.getOperator() ) { + case M: + case I: + case D: break; + default: cigar_acceptable = false; + } + } + if ( ! cigar_acceptable ) continue; + + int err = -1; /* System.out.println("MM: "+numMismatches(r)); System.out.println("direct: "+numMismatchesDirect(r,contig_seq)); @@ -130,13 +151,14 @@ public class IndelInspector extends CommandLineProgram { continue; */ - if ( ERR_MODE.equals("MM")) err = numMismatches(r); - else if ( ERR_MODE.equals("ERR")) err = numErrors(r); - else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(r); - if ( err > MAX_ERRS.intValue() ) continue; -// counter++; -// if ( counter % 1000000 == 0 ) System.out.println(counter+" records; "+col.memStatsString()); - col.receive(r); + if ( ERR_MODE.equals("MM")) err = numMismatches(r,contig_seq); + else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(r,contig_seq); + else if ( ERR_MODE.equals("ERR")) err = numErrors(r,contig_seq); + else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(r,contig_seq); + if ( err > MAX_ERRS.intValue() ) continue; + // counter++; + // if ( counter % 1000000 == 0 ) System.out.println(counter+" records; "+col.memStatsString()); + col.receive(r); } @@ -149,7 +171,7 @@ public class IndelInspector extends CommandLineProgram { samReader.close(); ptWriter.close(); return 0; - } + } /** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD * @@ -157,10 +179,10 @@ public class IndelInspector extends CommandLineProgram { * @return number of errors (number of mismatches plus total length of all insertions/deletions * @throws RuntimeException */ - private static int numErrors(SAMRecord r) throws RuntimeException { + private static int numErrors(SAMRecord r, ReferenceSequence refseq) throws RuntimeException { // NM currently stores the total number of mismatches in all blocks + 1 - int errs = numMismatches(r); + int errs = numMismatches(r,refseq); // now we have to add the total length of all indels: Cigar c = r.getCigar(); @@ -185,10 +207,10 @@ public class IndelInspector extends CommandLineProgram { * deletion will be counted as a single error regardless of the length) * @throws RuntimeException */ - private static int numMismatchesGaps(SAMRecord r) throws RuntimeException { + private static int numMismatchesGaps(SAMRecord r,ReferenceSequence refseq) throws RuntimeException { // NM currently stores the total number of mismatches in all blocks + 1 - int errs = numMismatches(r); + int errs = numMismatches(r,refseq); // now we have to add the total length of all indels: Cigar c = r.getCigar(); @@ -207,12 +229,14 @@ public class IndelInspector extends CommandLineProgram { } /** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */ - private static int numMismatches(SAMRecord r) throws RuntimeException { + private static int numMismatches(SAMRecord r, ReferenceSequence refseq) throws RuntimeException { - // NM currently stores the total number of mismatches in all blocks + 1 - return ((Integer)r.getAttribute("NM")).intValue() - 1; + // NM currently stores the total number of mismatches in all blocks + 1 + Integer i = (Integer)r.getAttribute("NM"); + if ( i == null ) return AlignmentUtils.numMismatches(r,refseq); + return ((Integer)r.getAttribute("NM")).intValue() - 1; - } + } /** Trivial utility method that goes some distance trying to ensure that the input file is there; * the only purpose is reducing clutter in main(). Receives a default @@ -265,7 +289,7 @@ public class IndelInspector extends CommandLineProgram { private void setDefaultContigOrdering() { Map rco = new HashMap(); rco.put("chrM",0); - for ( int i = 1 ; i <= 22 ; i++ ) rco.put("chr"+i,i); + for ( int i = 1 ; i <= 22 ; i++ ) rco.put(Integer.toString(i),i);//rco.put("chr"+i,i); rco.put("chrX",23); rco.put("chrY",24); } diff --git a/java/src/org/broadinstitute/sting/playground/indels/IndelRecordPileCollector.java b/java/src/org/broadinstitute/sting/playground/indels/IndelRecordPileCollector.java index 5d98c5fca..56be41780 100755 --- a/java/src/org/broadinstitute/sting/playground/indels/IndelRecordPileCollector.java +++ b/java/src/org/broadinstitute/sting/playground/indels/IndelRecordPileCollector.java @@ -65,26 +65,26 @@ import net.sf.samtools.*; */ public class IndelRecordPileCollector implements RecordReceiver { - private final int WAIT_STATE = 0; - private final int ACTIVE_STATE = 1; - - private boolean avoiding_region; // some regions are too funky (contain very long indel trains)- - // we will determine their span and report them, + private final int WAIT_STATE = 0; + private final int ACTIVE_STATE = 1; + + private boolean avoiding_region; // some regions are too funky (contain very long indel trains)- + // we will determine their span and report them, // but we won't be counting any indels there or building piles - private List mRecordPile; // here we keep the records before we decide how we want to emit them - private TreeSet > mAllIndels; ///< individual indels encountered, with observation counts - private int mLastContig ; ///< keeps the index of the contig last alignment was on - private int mLastStartOnRef; ///< keeps the start position of the last alignment - private int mState; ///< WAIT_STATE or ACTIVE_STATE - private int mIndelSeparation; ///< Indels that are farther away from one another than this value - ///< will be emitted separately; trains of indels with less then - ///< mIndelSeparation bases between each adjacent pair will be emitted - ///< as one pile. + private List mRecordPile; // here we keep the records before we decide how we want to emit them + private TreeSet > mAllIndels; ///< individual indels encountered, with observation counts + private int mLastContig ; ///< keeps the index of the contig last alignment was on + private int mLastStartOnRef; ///< keeps the start position of the last alignment + private int mState; ///< WAIT_STATE or ACTIVE_STATE + private int mIndelSeparation; ///< Indels that are farther away from one another than this value + ///< will be emitted separately; trains of indels with less then + ///< mIndelSeparation bases between each adjacent pair will be emitted + ///< as one pile. - // we will build histograms (distributions) of encountered indel lengths on the fly - private List mIndelLengthHistI; - private List mIndelLengthHistD; + // we will build histograms (distributions) of encountered indel lengths on the fly + private List mIndelLengthHistI; + private List mIndelLengthHistD; private RecordReceiver defaultReceiver; // we will send there records that do not overlap with regions of interest private RecordPileReceiver indelPileReceiver; // piles over indel regions will be sent there @@ -97,27 +97,27 @@ public class IndelRecordPileCollector implements RecordReceiver { String s = "mRecordPile: "; return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef; //+" Bndries="+mIndelRegionStart +":"+ mIndelRegionStop; - } + } - public IndelRecordPileCollector(RecordReceiver rr, RecordPileReceiver rp) throws java.io.IOException { - mRecordPile = new LinkedList(); - mAllIndels = new TreeSet >( - new CountedObjectComparatorAdapter(new IntervalComparator())); - mLastContig = -1; - mLastStartOnRef = -1; - mIndelSeparation = 51; - mIndelLengthHistI = new ArrayList(); - mIndelLengthHistD = new ArrayList(); - for ( int i = 0 ; i < 5 ; i++ ) { - mIndelLengthHistI.add(0); - mIndelLengthHistD.add(0); - } + public IndelRecordPileCollector(RecordReceiver rr, RecordPileReceiver rp) throws java.io.IOException { + mRecordPile = new LinkedList(); + mAllIndels = new TreeSet >( + new CountedObjectComparatorAdapter(new IntervalComparator())); + mLastContig = -1; + mLastStartOnRef = -1; + mIndelSeparation = 51; + mIndelLengthHistI = new ArrayList(); + mIndelLengthHistD = new ArrayList(); + for ( int i = 0 ; i < 5 ; i++ ) { + mIndelLengthHistI.add(0); + mIndelLengthHistD.add(0); + } defaultReceiver = rr; indelPileReceiver = rp; referenceSequence = null; setWaitState(); - } + } /** Fully reinitializes wait state: clears record pile and indel list, resets flags and states. * Does not emit records, just clears/resets the variables. @@ -136,16 +136,16 @@ public class IndelRecordPileCollector implements RecordReceiver { public void setReferenceSequence(String contig) { referenceSequence = contig; } - - /** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile + + /** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile * all the consequtive records with alignment end positions less than or equal to the specified * position pos, until the first record is encountered that does not meet this condition. Note that * there might be more alignments that end at or before pos later on in the pile, but * they will nit be emitted/removed by this method. - * @param pos all leading records with alignments ending before or at this position will be purged from the pile, + * @param pos all leading records with alignments ending before or at this position will be purged from the pile, * up to the first record that does not end at or before pos. - */ - protected void purgeRecordsEndingAtOrBefore(final long pos) { + */ + protected void purgeRecordsEndingAtOrBefore(final long pos) { Iterator i = mRecordPile.iterator(); while ( i.hasNext() ) { SAMRecord r = i.next(); @@ -154,13 +154,13 @@ public class IndelRecordPileCollector implements RecordReceiver { i.remove(); } else break; } - } + } /** A utility method: purges from the currently held SAM record pile all the records with alignment * start positions greater than or equal to the specified position pos * @param pos all records with alignments starting at or after this position will be purged from the pile */ - protected void purgeRecordsStartingAtOrAfter(final int pos) { + protected void purgeRecordsStartingAtOrAfter(final int pos) { Iterator i = mRecordPile.iterator(); while ( i.hasNext() ) { SAMRecord r = i.next(); @@ -169,7 +169,7 @@ public class IndelRecordPileCollector implements RecordReceiver { i.remove(); } else break; } - } + } /** This is the main interface method of the collector: it receives alignments, inspects them, detects indels, * updates and purges the read pile it keeps and emits alignments as needed. @@ -198,46 +198,46 @@ public class IndelRecordPileCollector implements RecordReceiver { */ public void receive(final SAMRecord r) throws RuntimeException { - if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do + if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do if ( controlRun ) { defaultReceiver.receive(r); return; } - int currContig = r.getReferenceIndex(); - int currPos = r.getAlignmentStart(); + int currContig = r.getReferenceIndex(); + int currPos = r.getAlignmentStart(); - if ( currContig < mLastContig ) throw new RuntimeException("SAM file is not ordered by contigs"); - if ( currContig == mLastContig && currPos < mLastStartOnRef ) throw new RuntimeException("SAM file is not ordered by start positions"); - - if ( currContig > mLastContig ) { - // we jumped onto a new contig; emit everything we might have been building and purge the piles: - emit(); - } else { // still on the same contig: + if ( currContig < mLastContig ) throw new RuntimeException("SAM file is not ordered by contigs"); + if ( currContig == mLastContig && currPos < mLastStartOnRef ) throw new RuntimeException("SAM file is not ordered by start positions"); + + if ( currContig > mLastContig ) { + // we jumped onto a new contig; emit everything we might have been building and purge the piles: + emit(); + } else { // still on the same contig: switch (mState) { - // everything ending up to currPos is guaranteed to have no overlaps with indels yet to come - case WAIT_STATE: purgeRecordsEndingAtOrBefore(currPos); break; - - // next indel can start only after currPos (whether it is in the current read or in the - // reads yet to come). If it is far enough from the last indel we have seen, we can emit - case ACTIVE_STATE: if ( currPos - mAllIndels.last().getObject().getStop() > mIndelSeparation ) emit(); break; - default: throw new RuntimeException("Unknown state"); + // everything ending up to currPos is guaranteed to have no overlaps with indels yet to come + case WAIT_STATE: purgeRecordsEndingAtOrBefore(currPos); break; + + // next indel can start only after currPos (whether it is in the current read or in the + // reads yet to come). If it is far enough from the last indel we have seen, we can emit + case ACTIVE_STATE: if ( currPos - mAllIndels.last().getObject().getStop() > mIndelSeparation ) emit(); break; + default: throw new RuntimeException("Unknown state"); } } // does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active' extractIndelsAndUpdateState(r.getCigar(),currPos); - if ( ! avoiding_region && mAllIndels.size() > 20 ) avoiding_region = true; + if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) avoiding_region = true; - if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region + if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region - mLastContig = currContig; - mLastStartOnRef = currPos; + mLastContig = currContig; + mLastStartOnRef = currPos; - } + } /** Emits all reads from the currently held pile, cleans the pile and fully reinitializes wait state * (clears indel list etc). @@ -247,35 +247,37 @@ public class IndelRecordPileCollector implements RecordReceiver { * splits the train (and the pile) into multiple trains/piles as needed (i.e. if there are pairs of adjacent * indels that are not overlapped by any read), and emits the final piles of records into indelReceiver. */ - private void emit() { + private void emit() { - if ( mState == WAIT_STATE || avoiding_region ) { + if ( mState == WAIT_STATE || avoiding_region ) { if ( avoiding_region ) { long start = mAllIndels.first().getObject().getStart(); long stop = mAllIndels.last().getObject().getStop(); System.out.println("Genomic region "+mLastContig+":"+ start + "-"+ stop + - " was ignored: "+mAllIndels.size() +" unique indels with average distance of "+ + " was ignored: "); + System.out.println(" "+mAllIndels.size() +" unique indels with average distance of "+ ((double)(stop - start))/((double)mAllIndels.size()-1) + " bases between indels"); + System.out.println(" "+mRecordPile.size() +" reads collected before aborting"); } - // no indels or avoiding indels in bad region: send all records to defaultReceiver and clear the pile + // no indels or avoiding indels in bad region: send all records to defaultReceiver and clear the pile for ( SAMRecord r : mRecordPile ) { defaultReceiver.receive(r); } setWaitState(); - return; - } - - // last minute cleanup: - // at this stage we have all the indels collected conservatively (in a sense - // that they can be farther away than it is needed) - this means that there actually - // can be more than one pile in what we have stored. Also, we can still have gapless reads - // at the ends of the piles that do not really overlap with indel sites. - - if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels"); - - HistogramAsNeeded(mAllIndels); + return; + } + + // last minute cleanup: + // at this stage we have all the indels collected conservatively (in a sense + // that they can be farther away than it is needed) - this means that there actually + // can be more than one pile in what we have stored. Also, we can still have gapless reads + // at the ends of the piles that do not really overlap with indel sites. + + if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels"); + + HistogramAsNeeded(mAllIndels); // indels are in a sorted map, and reads were added to the pile in the order they were received (also sorted). @@ -350,19 +352,19 @@ public class IndelRecordPileCollector implements RecordReceiver { } setWaitState(); - } + } - /** Looks for indels in the cigar and, if finds any, updates list of indels in the current train ans sets + /** Looks for indels in the cigar and, if finds any, updates list of indels in the current train ans sets * the state to 'active'. If cigar contains no indels, this method does not do anything (it does not * set state back to 'wait' either!). If this method finds any indels in the cigar, it first tries to find them * in the list of previously seen indels. If the indel was already seen before, its counter is updated (indels * are stored in the list as counted objects), oherwise indel is added to the list with initial count of 1. - * - * @param c alignment cigar; if it contains no indels, nothing will be done - * @param start position, at which the alignment represented by cigar c starts on the reference - */ - private void extractIndelsAndUpdateState(final Cigar c, final int start) { + * + * @param c alignment cigar; if it contains no indels, nothing will be done + * @param start position, at which the alignment represented by cigar c starts on the reference + */ + private void extractIndelsAndUpdateState(final Cigar c, final int start) { // // firstpos,lastpos span of the indel will be interpreted as follows: // any alignment that ends strictly before firstpos or starts strictly after lastpos @@ -387,12 +389,12 @@ public class IndelRecordPileCollector implements RecordReceiver { if ( c.numCigarElements() == 1 ) return; // most of the reads have no indels, save a few cycles by returning early - for ( int i = 0 ; i < c.numCigarElements() ; i++ ) { + for ( int i = 0 ; i < c.numCigarElements() ; i++ ) { - final CigarElement ce = c.getCigarElement(i); + final CigarElement ce = c.getCigarElement(i); Indel indel = null; - switch(ce.getOperator()) { + switch(ce.getOperator()) { case I: indel = new Indel(runninglength, ce.getLength(), IndelType.I); break; case D: indel = new Indel(runninglength, ce.getLength(), IndelType.D); runninglength += ce.getLength(); @@ -400,9 +402,9 @@ public class IndelRecordPileCollector implements RecordReceiver { case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment default : throw new IllegalArgumentException("Unexpected operator in cigar string"); - } + } - if ( indel == null ) continue; // element was not an indel, go grab next element... + if ( indel == null ) continue; // element was not an indel, go grab next element... mState = ACTIVE_STATE; // this is redundant and will be executed unnecessarily many times, but it's cheap... @@ -411,82 +413,82 @@ public class IndelRecordPileCollector implements RecordReceiver { if ( indelWithCount.equals( found ) ) found.increment(); // we did find our indel, advance the counter else mAllIndels.add(indelWithCount); // this is a new indel. Add it. - } // end for loop over all alignment cigar elements + } // end for loop over all alignment cigar elements - } // end extractIndels() method + } // end extractIndels() method - /** Counts the size of the passed argument into the appropriate size histogram - * - * @param indel size of this indel will be counted in - */ - private void addToSizeHistogram(Indel indel) { - // count this indel's size into the appropriate bin of the appropriate histogram - // (we count insertions and deletions separately), resizing the histogram array if needed: - List histogram; - if ( indel.getType() == Indel.IndelType.D ) { - histogram = mIndelLengthHistD; - } else if ( indel.getType() == Indel.IndelType.I ) { - histogram = mIndelLengthHistI; - } else { - throw new RuntimeException("Indel of unknown type"); - } - if( indel.getIndelLength() > histogram.size() ) { - for ( int j = histogram.size() ; j < indel.getIndelLength() ; j++ ) histogram.add(0); - histogram.set((int)indel.getIndelLength()-1, 1); // we are seeing this length for the first time, so count == 1 - } else { - int n = histogram.get((int)indel.getIndelLength()-1); - histogram.set((int)indel.getIndelLength()-1, n+1); - } - } + /** Counts the size of the passed argument into the appropriate size histogram + * + * @param indel size of this indel will be counted in + */ + private void addToSizeHistogram(Indel indel) { + // count this indel's size into the appropriate bin of the appropriate histogram + // (we count insertions and deletions separately), resizing the histogram array if needed: + List histogram; + if ( indel.getType() == Indel.IndelType.D ) { + histogram = mIndelLengthHistD; + } else if ( indel.getType() == Indel.IndelType.I ) { + histogram = mIndelLengthHistI; + } else { + throw new RuntimeException("Indel of unknown type"); + } + if( indel.getIndelLength() > histogram.size() ) { + for ( int j = histogram.size() ; j < indel.getIndelLength() ; j++ ) histogram.add(0); + histogram.set((int)indel.getIndelLength()-1, 1); // we are seeing this length for the first time, so count == 1 + } else { + int n = histogram.get((int)indel.getIndelLength()-1); + histogram.set((int)indel.getIndelLength()-1, n+1); + } + } - /** Adds sizes of the indels from the list that pass some filters to the histograms - * - * @param indels collection of indels with counts - */ - private void HistogramAsNeeded(Collection> indels) { - for ( CountedObject o : indels ) { - if ( o.getCount() >= 2 ) addToSizeHistogram(o.getObject()); - } - } + /** Adds sizes of the indels from the list that pass some filters to the histograms + * + * @param indels collection of indels with counts + */ + private void HistogramAsNeeded(Collection> indels) { + for ( CountedObject o : indels ) { + if ( o.getCount() >= 2 ) addToSizeHistogram(o.getObject()); + } + } - /** Retruns true if the indel run has to be printed into output; currently, indel run is acceptable - * if it contains at least one indel onbserved more than once. - * @param indels list of indels with counts to check for being acceptable - * @return true if the indel run has to be printed - */ - private boolean shouldAcceptForOutput(List> indels) { - for ( CountedObject o : indels ) { - if ( o.getCount() >= 2 ) return true; - } - return false; - } - - private String formatRange(List> indels) { - StringBuffer b = new StringBuffer(); - StringBuffer all = new StringBuffer(); - - long min = 1000000000; - long max = 0; + /** Retruns true if the indel run has to be printed into output; currently, indel run is acceptable + * if it contains at least one indel onbserved more than once. + * @param indels list of indels with counts to check for being acceptable + * @return true if the indel run has to be printed + */ + private boolean shouldAcceptForOutput(List> indels) { + for ( CountedObject o : indels ) { + if ( o.getCount() >= 2 ) return true; + } + return false; + } + private String formatRange(List> indels) { + StringBuffer b = new StringBuffer(); + StringBuffer all = new StringBuffer(); + + long min = 1000000000; + long max = 0; + all.append("; passing indels:"); - for ( CountedObject o : indels ) { - if ( o.getCount() < 2 ) continue; + for ( CountedObject o : indels ) { + if ( o.getCount() < 2 ) continue; all.append(" "); all.append(o.getObject().getIndelLength()); - if ( o.getObject().getIndelLength() < min ) min = o.getObject().getIndelLength(); - if ( o.getObject().getIndelLength() > max ) max = o.getObject().getIndelLength(); - } + if ( o.getObject().getIndelLength() < min ) min = o.getObject().getIndelLength(); + if ( o.getObject().getIndelLength() > max ) max = o.getObject().getIndelLength(); + } if ( max == 0 ) return new String(); // no passinf indels, return empty string - - b.append(" passing min length: "); - b.append(min); - b.append("; passing max length: "); - b.append(max); - b.append(all); - return b.toString(); - } + + b.append(" passing min length: "); + b.append(min); + b.append("; passing max length: "); + b.append(max); + b.append(all); + return b.toString(); + } public void printLengthHistograms() { if ( mIndelLengthHistD.size() < mIndelLengthHistI.size() ) { diff --git a/java/src/org/broadinstitute/sting/playground/utils/TrioConcordanceRecord.java b/java/src/org/broadinstitute/sting/playground/utils/TrioConcordanceRecord.java index bada25249..a9a099bdb 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/TrioConcordanceRecord.java +++ b/java/src/org/broadinstitute/sting/playground/utils/TrioConcordanceRecord.java @@ -19,6 +19,15 @@ public class TrioConcordanceRecord { public int inconsistent_indels_in_parent = 0 ; public int inconsistent_indels_in_kid = 0 ; public int non_biallelic = 0; // number of variant calls that are not biallelic + public long mom_assessed = 0; // number of assessed loci for mother (i.e. passing confidence threshold filter) + public long dad_assessed = 0; + public long kid_assessed = 0; + public long mom_ref = 0; // number of reference calls (out of total assessed) + public long dad_ref = 0; + public long kid_ref = 0; + public long mom_snp = 0; // number of snp calls (out of total assessed) + public long dad_snp = 0; + public long kid_snp = 0; public TrioConcordanceRecord add(TrioConcordanceRecord other) { this.assessed_loci += other.assessed_loci; @@ -32,6 +41,15 @@ public class TrioConcordanceRecord { this.inconsistent_indels_in_parent += other.inconsistent_indels_in_parent; this.inconsistent_indels_in_kid += other.inconsistent_indels_in_kid; this.non_biallelic += other.non_biallelic; + this.mom_assessed += other.mom_assessed; + this.dad_assessed += other.dad_assessed; + this.kid_assessed += other.kid_assessed; + this.mom_ref += other.mom_ref; + this.dad_ref += other.dad_ref; + this.kid_ref += other.kid_ref; + this.mom_snp += other.mom_snp; + this.dad_snp += other.dad_snp; + this.kid_snp += other.kid_snp; return this; } @@ -39,8 +57,11 @@ public class TrioConcordanceRecord { public int totalSNP() { return consistent_snp + inconsistent_snp + non_biallelic; } public String toString() { - return String.format("assessed: %d; reference: %d (%3.2f); total snp: %d; consistent snp: %d (%3.2f); multiallelic: %d (%3.2f); " , + return String.format("%ntotal assessed in trio: %d%n reference: %d (%3.2f)%n total snp: %d%n consistent snp: %d (%3.2f)%n multiallelic: %d (%3.2f)%nper trio individual:%n assessed:%n mother: %d%n father: %d%n daughter: %d%n" , assessed_loci, consistent_ref, ((double)consistent_ref*100.00)/assessed_loci,totalSNP(), consistent_snp, ((double)consistent_snp*100.0)/totalSNP(), - non_biallelic, ((double)non_biallelic*100.0)/totalSNP()); + non_biallelic, ((double)non_biallelic*100.0)/totalSNP(),mom_assessed,dad_assessed,kid_assessed); +// return String.format("total assessed in trio: %d%n reference: %d (%3.2f)%n total snp: %d%n consistent snp: %d (%3.2f)%n multiallelic: %d (%3.2f)" , +// assessed_loci, consistent_ref, ((double)consistent_ref*100.00)/assessed_loci,totalSNP(), consistent_snp, ((double)consistent_snp*100.0)/totalSNP(), +// non_biallelic, ((double)non_biallelic*100.0)/totalSNP()); } }