Major: allow genotyper to optionally output in 1KG format, including outputting the samples in which indels are found.

Minor: refactor 454 filtering


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1300 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-07-23 19:53:51 +00:00
parent f7168bd7cf
commit 4efe26c59a
7 changed files with 60 additions and 21 deletions

View File

@ -38,6 +38,6 @@ import org.broadinstitute.sting.utils.Utils;
public class Platform454Filter implements SamRecordFilter {
public boolean filterOut(SAMRecord rec) {
return (Utils.is454Read(rec, rec.getHeader()));
return (Utils.is454Read(rec));
}
}

View File

@ -28,9 +28,11 @@ import java.util.Set;
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="bed", shortName="bed", doc="BED output file name", required=true)
java.io.File bed_file;
@Argument(fullName="somatic", shortName="somatic",
@Argument(fullName="outputFile", shortName="O", doc="output file name (defaults to BED format)", required=true)
java.io.File bed_file;
@Argument(fullName="1kg_format", shortName="1kg", doc="output in 1000 genomes format", required=false)
boolean FORMAT_1KG;
@Argument(fullName="somatic", shortName="somatic",
doc="Perform somatic calls; two input alignment files must be specified", required=false)
boolean call_somatic = false;
@Argument(fullName="verbose", shortName="verbose",
@ -318,19 +320,23 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
private String makeBedLine(Pair<IndelVariant,Integer> p, int coverage, long pos, java.io.Writer bedOutput) {
int event_length = p.first.lengthOnRef();
if ( event_length < 0 ) event_length = 0;
String message = refName+"\t"+(pos-1)+"\t"+(pos-1+event_length)+
"\t"+(event_length>0? "-":"+")+p.first.getBases() +":"+p.second+"/"+coverage;
StringBuffer message = new StringBuffer();
message.append(refName+"\t"+(pos-1)+"\t");
if ( FORMAT_1KG )
message.append(p.first.getBases().length() + "\t" + (event_length > 0 ? "D" : "I") + "\t" + p.first.getBases() + "\t" + p.first.getSamples());
else
message.append((pos-1+event_length)+"\t"+(event_length>0? "-":"+")+p.first.getBases() +":"+p.second+"/"+coverage);
if ( bedOutput != null ) {
try {
bedOutput.write(message+"\n");
bedOutput.write(message.toString()+"\n");
} catch (IOException e) {
System.out.println(e.getMessage());
e.printStackTrace();
throw new StingException("Error encountered while writing into output BED file");
}
}
return message;
return message.toString();
}
/** Same as makeBedLine(Pair,int,long,Writer), but only builds and returns the line without writing it anywhere.
@ -556,7 +562,8 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
private String bases;
private Type type;
private int count;
private HashSet<String> samples = new HashSet<String>();
public IndelVariant(Type type, String bases) {
this.type = type;
this.bases = bases;
@ -578,6 +585,22 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
public void increment() { count+=1; }
public void addSample(String sample) {
if ( sample != null )
samples.add(sample);
}
public String getSamples() {
StringBuffer sb = new StringBuffer();
Iterator i = samples.iterator();
while ( i.hasNext() ) {
sb.append(i.next());
if ( i.hasNext() )
sb.append(",");
}
return sb.toString();
}
public int getCount() { return count; }
public String getBases() { return bases; }
@ -723,7 +746,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
try {
// note that here we will be assigning indels to the first deleted base or to the first
// base after insertion, not to the last base before the event!
updateCount(localStart+eventPosition, type, bases);
updateCount(localStart+eventPosition, type, bases, r);
} catch (IndexOutOfBoundsException e) {
System.out.println("Read "+r.getReadName()+": out of coverage window bounds.Probably window is too small.\n"+
"Read length="+r.getReadLength()+"; cigar="+r.getCigarString()+"; start="+
@ -748,25 +771,41 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
* @param type
* @param bases
*/
private void updateCount(int pos, IndelVariant.Type type, String bases) {
private void updateCount(int pos, IndelVariant.Type type, String bases, SAMRecord r) {
List<IndelVariant> indelsAtSite = indels.get(pos);
if ( indelsAtSite == null ) {
indelsAtSite = new ArrayList<IndelVariant>();
indels.set(pos, indelsAtSite);
}
String sample = null;
Object readGroupAttr = r.getAttribute("RG");
if ( readGroupAttr != null ) {
SAMReadGroupRecord readGroup = r.getHeader().getReadGroup(readGroupAttr.toString());
if ( readGroup != null ) {
Object readSampleAttr = readGroup.getAttribute("SM");
if ( readSampleAttr != null )
sample = readSampleAttr.toString();
}
}
boolean found = false;
for ( IndelVariant v : indelsAtSite ) {
if ( ! v.equals(type, bases) ) continue;
v.increment();
v.increment();
v.addSample(sample);
found = true;
break;
}
if ( ! found ) indelsAtSite.add(new IndelVariant(type, bases));
}
if ( ! found ) {
IndelVariant v = new IndelVariant(type, bases);
v.addSample(sample);
indelsAtSite.add(v);
}
}
/** Resets reference start position to 0 and sets all coverage counts in the window to 0.
*

View File

@ -31,7 +31,7 @@ public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval
return ( !read.getReadUnmappedFlag() && // mapped
read.getMappingQuality() != 0 && // positive mapping quality
read.getAlignmentBlocks().size() > 1 && // indel
(allow454 || !Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) );
(allow454 || !Utils.is454Read(read)) );
}
public Interval map(char[] ref, SAMRecord read) {

View File

@ -111,7 +111,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
!read.getDuplicateReadFlag() &&
read.getMappingQuality() != 0 &&
read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START &&
(allow454 || !Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) )
(allow454 || !Utils.is454Read(read)) )
goodReads.add(read);
else if ( writer != null && !cleanedReadsOnly )
readsToWrite.add(new ComparableSAMRecord(read));

View File

@ -60,7 +60,7 @@ public class IntervalMergerWalker extends ReadWalker<Integer,Integer> {
@Override
public Integer map(char[] ref, SAMRecord read) {
if ( firstInterval == null ||
(!allow454 && Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) )
(!allow454 && Utils.is454Read(read)) )
return 0;
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);

View File

@ -40,7 +40,7 @@ public class MismatchIntervalWalker extends LocusWalker<Pair<GenomeLoc, Boolean>
SAMRecord read = reads.get(i);
if ( read.getMappingQuality() == 0 ||
read.getAlignmentBlocks().size() > 1 ||
(!allow454 && Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) )
(!allow454 && Utils.is454Read(read)) )
continue;
goodReads++;

View File

@ -190,10 +190,10 @@ public class Utils {
return new String(basesAsbytes);
}
public static boolean is454Read(SAMRecord read, SAMFileHeader header) {
public static boolean is454Read(SAMRecord read) {
Object readGroupAttr = read.getAttribute("RG");
if ( readGroupAttr != null ) {
SAMReadGroupRecord readGroup = header.getReadGroup(readGroupAttr.toString());
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup(readGroupAttr.toString());
if ( readGroup != null ) {
Object readPlatformAttr = readGroup.getAttribute("PL");
if ( readPlatformAttr != null )