From 4efe26c59a94e2dd0bad788086261b36f2279a5b Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 23 Jul 2009 19:53:51 +0000 Subject: [PATCH] Major: allow genotyper to optionally output in 1KG format, including outputting the samples in which indels are found. Minor: refactor 454 filtering git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1300 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/filters/Platform454Filter.java | 2 +- .../walkers/indels/IndelGenotyperWalker.java | 67 +++++++++++++++---- .../walkers/indels/IndelIntervalWalker.java | 2 +- .../walkers/indels/IntervalCleanerWalker.java | 2 +- .../walkers/indels/IntervalMergerWalker.java | 2 +- .../indels/MismatchIntervalWalker.java | 2 +- .../org/broadinstitute/sting/utils/Utils.java | 4 +- 7 files changed, 60 insertions(+), 21 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java b/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java index ac39fe56a..e5b78e8a6 100755 --- a/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java @@ -38,6 +38,6 @@ import org.broadinstitute.sting.utils.Utils; public class Platform454Filter implements SamRecordFilter { public boolean filterOut(SAMRecord rec) { - return (Utils.is454Read(rec, rec.getHeader())); + return (Utils.is454Read(rec)); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelGenotyperWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelGenotyperWalker.java index da05815a8..11d017c2f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelGenotyperWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelGenotyperWalker.java @@ -28,9 +28,11 @@ import java.util.Set; @ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class}) public class IndelGenotyperWalker extends ReadWalker { - @Argument(fullName="bed", shortName="bed", doc="BED output file name", required=true) - java.io.File bed_file; - @Argument(fullName="somatic", shortName="somatic", + @Argument(fullName="outputFile", shortName="O", doc="output file name (defaults to BED format)", required=true) + java.io.File bed_file; + @Argument(fullName="1kg_format", shortName="1kg", doc="output in 1000 genomes format", required=false) + boolean FORMAT_1KG; + @Argument(fullName="somatic", shortName="somatic", doc="Perform somatic calls; two input alignment files must be specified", required=false) boolean call_somatic = false; @Argument(fullName="verbose", shortName="verbose", @@ -318,19 +320,23 @@ public class IndelGenotyperWalker extends ReadWalker { private String makeBedLine(Pair p, int coverage, long pos, java.io.Writer bedOutput) { int event_length = p.first.lengthOnRef(); if ( event_length < 0 ) event_length = 0; - String message = refName+"\t"+(pos-1)+"\t"+(pos-1+event_length)+ - "\t"+(event_length>0? "-":"+")+p.first.getBases() +":"+p.second+"/"+coverage; + StringBuffer message = new StringBuffer(); + message.append(refName+"\t"+(pos-1)+"\t"); + if ( FORMAT_1KG ) + message.append(p.first.getBases().length() + "\t" + (event_length > 0 ? "D" : "I") + "\t" + p.first.getBases() + "\t" + p.first.getSamples()); + else + message.append((pos-1+event_length)+"\t"+(event_length>0? "-":"+")+p.first.getBases() +":"+p.second+"/"+coverage); if ( bedOutput != null ) { try { - bedOutput.write(message+"\n"); + bedOutput.write(message.toString()+"\n"); } catch (IOException e) { System.out.println(e.getMessage()); e.printStackTrace(); throw new StingException("Error encountered while writing into output BED file"); } } - return message; + return message.toString(); } /** Same as makeBedLine(Pair,int,long,Writer), but only builds and returns the line without writing it anywhere. @@ -556,7 +562,8 @@ public class IndelGenotyperWalker extends ReadWalker { private String bases; private Type type; private int count; - + private HashSet samples = new HashSet(); + public IndelVariant(Type type, String bases) { this.type = type; this.bases = bases; @@ -578,6 +585,22 @@ public class IndelGenotyperWalker extends ReadWalker { public void increment() { count+=1; } + public void addSample(String sample) { + if ( sample != null ) + samples.add(sample); + } + + public String getSamples() { + StringBuffer sb = new StringBuffer(); + Iterator i = samples.iterator(); + while ( i.hasNext() ) { + sb.append(i.next()); + if ( i.hasNext() ) + sb.append(","); + } + return sb.toString(); + } + public int getCount() { return count; } public String getBases() { return bases; } @@ -723,7 +746,7 @@ public class IndelGenotyperWalker extends ReadWalker { try { // note that here we will be assigning indels to the first deleted base or to the first // base after insertion, not to the last base before the event! - updateCount(localStart+eventPosition, type, bases); + updateCount(localStart+eventPosition, type, bases, r); } catch (IndexOutOfBoundsException e) { System.out.println("Read "+r.getReadName()+": out of coverage window bounds.Probably window is too small.\n"+ "Read length="+r.getReadLength()+"; cigar="+r.getCigarString()+"; start="+ @@ -748,25 +771,41 @@ public class IndelGenotyperWalker extends ReadWalker { * @param type * @param bases */ - private void updateCount(int pos, IndelVariant.Type type, String bases) { + private void updateCount(int pos, IndelVariant.Type type, String bases, SAMRecord r) { List indelsAtSite = indels.get(pos); if ( indelsAtSite == null ) { indelsAtSite = new ArrayList(); indels.set(pos, indelsAtSite); } + String sample = null; + Object readGroupAttr = r.getAttribute("RG"); + if ( readGroupAttr != null ) { + SAMReadGroupRecord readGroup = r.getHeader().getReadGroup(readGroupAttr.toString()); + if ( readGroup != null ) { + Object readSampleAttr = readGroup.getAttribute("SM"); + if ( readSampleAttr != null ) + sample = readSampleAttr.toString(); + } + } + boolean found = false; for ( IndelVariant v : indelsAtSite ) { if ( ! v.equals(type, bases) ) continue; - v.increment(); + v.increment(); + v.addSample(sample); found = true; break; } - if ( ! found ) indelsAtSite.add(new IndelVariant(type, bases)); - - } + if ( ! found ) { + IndelVariant v = new IndelVariant(type, bases); + v.addSample(sample); + indelsAtSite.add(v); + + } + } /** Resets reference start position to 0 and sets all coverage counts in the window to 0. * diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java index a35ac496d..410e9ff62 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IndelIntervalWalker.java @@ -31,7 +31,7 @@ public class IndelIntervalWalker extends ReadWalker 1 && // indel - (allow454 || !Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) ); + (allow454 || !Utils.is454Read(read)) ); } public Interval map(char[] ref, SAMRecord read) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java index 695dbee32..a9d4dc053 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java @@ -111,7 +111,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker !read.getDuplicateReadFlag() && read.getMappingQuality() != 0 && read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START && - (allow454 || !Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) ) + (allow454 || !Utils.is454Read(read)) ) goodReads.add(read); else if ( writer != null && !cleanedReadsOnly ) readsToWrite.add(new ComparableSAMRecord(read)); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java index 41de072b4..51cb2e95e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalMergerWalker.java @@ -60,7 +60,7 @@ public class IntervalMergerWalker extends ReadWalker { @Override public Integer map(char[] ref, SAMRecord read) { if ( firstInterval == null || - (!allow454 && Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) ) + (!allow454 && Utils.is454Read(read)) ) return 0; GenomeLoc loc = GenomeLocParser.createGenomeLoc(read); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/MismatchIntervalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/MismatchIntervalWalker.java index 0941e0d6d..efa3fb3af 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/MismatchIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/MismatchIntervalWalker.java @@ -40,7 +40,7 @@ public class MismatchIntervalWalker extends LocusWalker SAMRecord read = reads.get(i); if ( read.getMappingQuality() == 0 || read.getAlignmentBlocks().size() > 1 || - (!allow454 && Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) ) + (!allow454 && Utils.is454Read(read)) ) continue; goodReads++; diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index 66817cb2c..6ce45efc0 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -190,10 +190,10 @@ public class Utils { return new String(basesAsbytes); } - public static boolean is454Read(SAMRecord read, SAMFileHeader header) { + public static boolean is454Read(SAMRecord read) { Object readGroupAttr = read.getAttribute("RG"); if ( readGroupAttr != null ) { - SAMReadGroupRecord readGroup = header.getReadGroup(readGroupAttr.toString()); + SAMReadGroupRecord readGroup = read.getHeader().getReadGroup(readGroupAttr.toString()); if ( readGroup != null ) { Object readPlatformAttr = readGroup.getAttribute("PL"); if ( readPlatformAttr != null )