Major: allow genotyper to optionally output in 1KG format, including outputting the samples in which indels are found.
Minor: refactor 454 filtering git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1300 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f7168bd7cf
commit
4efe26c59a
|
|
@ -38,6 +38,6 @@ import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
public class Platform454Filter implements SamRecordFilter {
|
public class Platform454Filter implements SamRecordFilter {
|
||||||
public boolean filterOut(SAMRecord rec) {
|
public boolean filterOut(SAMRecord rec) {
|
||||||
return (Utils.is454Read(rec, rec.getHeader()));
|
return (Utils.is454Read(rec));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -28,8 +28,10 @@ import java.util.Set;
|
||||||
|
|
||||||
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
|
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
|
||||||
public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
@Argument(fullName="bed", shortName="bed", doc="BED output file name", required=true)
|
@Argument(fullName="outputFile", shortName="O", doc="output file name (defaults to BED format)", required=true)
|
||||||
java.io.File bed_file;
|
java.io.File bed_file;
|
||||||
|
@Argument(fullName="1kg_format", shortName="1kg", doc="output in 1000 genomes format", required=false)
|
||||||
|
boolean FORMAT_1KG;
|
||||||
@Argument(fullName="somatic", shortName="somatic",
|
@Argument(fullName="somatic", shortName="somatic",
|
||||||
doc="Perform somatic calls; two input alignment files must be specified", required=false)
|
doc="Perform somatic calls; two input alignment files must be specified", required=false)
|
||||||
boolean call_somatic = false;
|
boolean call_somatic = false;
|
||||||
|
|
@ -318,19 +320,23 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
private String makeBedLine(Pair<IndelVariant,Integer> p, int coverage, long pos, java.io.Writer bedOutput) {
|
private String makeBedLine(Pair<IndelVariant,Integer> p, int coverage, long pos, java.io.Writer bedOutput) {
|
||||||
int event_length = p.first.lengthOnRef();
|
int event_length = p.first.lengthOnRef();
|
||||||
if ( event_length < 0 ) event_length = 0;
|
if ( event_length < 0 ) event_length = 0;
|
||||||
String message = refName+"\t"+(pos-1)+"\t"+(pos-1+event_length)+
|
StringBuffer message = new StringBuffer();
|
||||||
"\t"+(event_length>0? "-":"+")+p.first.getBases() +":"+p.second+"/"+coverage;
|
message.append(refName+"\t"+(pos-1)+"\t");
|
||||||
|
if ( FORMAT_1KG )
|
||||||
|
message.append(p.first.getBases().length() + "\t" + (event_length > 0 ? "D" : "I") + "\t" + p.first.getBases() + "\t" + p.first.getSamples());
|
||||||
|
else
|
||||||
|
message.append((pos-1+event_length)+"\t"+(event_length>0? "-":"+")+p.first.getBases() +":"+p.second+"/"+coverage);
|
||||||
|
|
||||||
if ( bedOutput != null ) {
|
if ( bedOutput != null ) {
|
||||||
try {
|
try {
|
||||||
bedOutput.write(message+"\n");
|
bedOutput.write(message.toString()+"\n");
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
System.out.println(e.getMessage());
|
System.out.println(e.getMessage());
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
throw new StingException("Error encountered while writing into output BED file");
|
throw new StingException("Error encountered while writing into output BED file");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return message;
|
return message.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Same as makeBedLine(Pair,int,long,Writer), but only builds and returns the line without writing it anywhere.
|
/** Same as makeBedLine(Pair,int,long,Writer), but only builds and returns the line without writing it anywhere.
|
||||||
|
|
@ -556,6 +562,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
private String bases;
|
private String bases;
|
||||||
private Type type;
|
private Type type;
|
||||||
private int count;
|
private int count;
|
||||||
|
private HashSet<String> samples = new HashSet<String>();
|
||||||
|
|
||||||
public IndelVariant(Type type, String bases) {
|
public IndelVariant(Type type, String bases) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
|
|
@ -578,6 +585,22 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
public void increment() { count+=1; }
|
public void increment() { count+=1; }
|
||||||
|
|
||||||
|
public void addSample(String sample) {
|
||||||
|
if ( sample != null )
|
||||||
|
samples.add(sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSamples() {
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
Iterator i = samples.iterator();
|
||||||
|
while ( i.hasNext() ) {
|
||||||
|
sb.append(i.next());
|
||||||
|
if ( i.hasNext() )
|
||||||
|
sb.append(",");
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
public int getCount() { return count; }
|
public int getCount() { return count; }
|
||||||
|
|
||||||
public String getBases() { return bases; }
|
public String getBases() { return bases; }
|
||||||
|
|
@ -723,7 +746,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
try {
|
try {
|
||||||
// note that here we will be assigning indels to the first deleted base or to the first
|
// note that here we will be assigning indels to the first deleted base or to the first
|
||||||
// base after insertion, not to the last base before the event!
|
// base after insertion, not to the last base before the event!
|
||||||
updateCount(localStart+eventPosition, type, bases);
|
updateCount(localStart+eventPosition, type, bases, r);
|
||||||
} catch (IndexOutOfBoundsException e) {
|
} catch (IndexOutOfBoundsException e) {
|
||||||
System.out.println("Read "+r.getReadName()+": out of coverage window bounds.Probably window is too small.\n"+
|
System.out.println("Read "+r.getReadName()+": out of coverage window bounds.Probably window is too small.\n"+
|
||||||
"Read length="+r.getReadLength()+"; cigar="+r.getCigarString()+"; start="+
|
"Read length="+r.getReadLength()+"; cigar="+r.getCigarString()+"; start="+
|
||||||
|
|
@ -748,25 +771,41 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
||||||
* @param type
|
* @param type
|
||||||
* @param bases
|
* @param bases
|
||||||
*/
|
*/
|
||||||
private void updateCount(int pos, IndelVariant.Type type, String bases) {
|
private void updateCount(int pos, IndelVariant.Type type, String bases, SAMRecord r) {
|
||||||
List<IndelVariant> indelsAtSite = indels.get(pos);
|
List<IndelVariant> indelsAtSite = indels.get(pos);
|
||||||
if ( indelsAtSite == null ) {
|
if ( indelsAtSite == null ) {
|
||||||
indelsAtSite = new ArrayList<IndelVariant>();
|
indelsAtSite = new ArrayList<IndelVariant>();
|
||||||
indels.set(pos, indelsAtSite);
|
indels.set(pos, indelsAtSite);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String sample = null;
|
||||||
|
Object readGroupAttr = r.getAttribute("RG");
|
||||||
|
if ( readGroupAttr != null ) {
|
||||||
|
SAMReadGroupRecord readGroup = r.getHeader().getReadGroup(readGroupAttr.toString());
|
||||||
|
if ( readGroup != null ) {
|
||||||
|
Object readSampleAttr = readGroup.getAttribute("SM");
|
||||||
|
if ( readSampleAttr != null )
|
||||||
|
sample = readSampleAttr.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
boolean found = false;
|
boolean found = false;
|
||||||
for ( IndelVariant v : indelsAtSite ) {
|
for ( IndelVariant v : indelsAtSite ) {
|
||||||
if ( ! v.equals(type, bases) ) continue;
|
if ( ! v.equals(type, bases) ) continue;
|
||||||
|
|
||||||
v.increment();
|
v.increment();
|
||||||
|
v.addSample(sample);
|
||||||
found = true;
|
found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! found ) indelsAtSite.add(new IndelVariant(type, bases));
|
if ( ! found ) {
|
||||||
|
IndelVariant v = new IndelVariant(type, bases);
|
||||||
|
v.addSample(sample);
|
||||||
|
indelsAtSite.add(v);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Resets reference start position to 0 and sets all coverage counts in the window to 0.
|
/** Resets reference start position to 0 and sets all coverage counts in the window to 0.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval
|
||||||
return ( !read.getReadUnmappedFlag() && // mapped
|
return ( !read.getReadUnmappedFlag() && // mapped
|
||||||
read.getMappingQuality() != 0 && // positive mapping quality
|
read.getMappingQuality() != 0 && // positive mapping quality
|
||||||
read.getAlignmentBlocks().size() > 1 && // indel
|
read.getAlignmentBlocks().size() > 1 && // indel
|
||||||
(allow454 || !Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) );
|
(allow454 || !Utils.is454Read(read)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
public Interval map(char[] ref, SAMRecord read) {
|
public Interval map(char[] ref, SAMRecord read) {
|
||||||
|
|
|
||||||
|
|
@ -111,7 +111,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
!read.getDuplicateReadFlag() &&
|
!read.getDuplicateReadFlag() &&
|
||||||
read.getMappingQuality() != 0 &&
|
read.getMappingQuality() != 0 &&
|
||||||
read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START &&
|
read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START &&
|
||||||
(allow454 || !Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) )
|
(allow454 || !Utils.is454Read(read)) )
|
||||||
goodReads.add(read);
|
goodReads.add(read);
|
||||||
else if ( writer != null && !cleanedReadsOnly )
|
else if ( writer != null && !cleanedReadsOnly )
|
||||||
readsToWrite.add(new ComparableSAMRecord(read));
|
readsToWrite.add(new ComparableSAMRecord(read));
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class IntervalMergerWalker extends ReadWalker<Integer,Integer> {
|
||||||
@Override
|
@Override
|
||||||
public Integer map(char[] ref, SAMRecord read) {
|
public Integer map(char[] ref, SAMRecord read) {
|
||||||
if ( firstInterval == null ||
|
if ( firstInterval == null ||
|
||||||
(!allow454 && Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) )
|
(!allow454 && Utils.is454Read(read)) )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
|
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ public class MismatchIntervalWalker extends LocusWalker<Pair<GenomeLoc, Boolean>
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
if ( read.getMappingQuality() == 0 ||
|
if ( read.getMappingQuality() == 0 ||
|
||||||
read.getAlignmentBlocks().size() > 1 ||
|
read.getAlignmentBlocks().size() > 1 ||
|
||||||
(!allow454 && Utils.is454Read(read, getToolkit().getEngine().getSAMHeader())) )
|
(!allow454 && Utils.is454Read(read)) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
goodReads++;
|
goodReads++;
|
||||||
|
|
|
||||||
|
|
@ -190,10 +190,10 @@ public class Utils {
|
||||||
return new String(basesAsbytes);
|
return new String(basesAsbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean is454Read(SAMRecord read, SAMFileHeader header) {
|
public static boolean is454Read(SAMRecord read) {
|
||||||
Object readGroupAttr = read.getAttribute("RG");
|
Object readGroupAttr = read.getAttribute("RG");
|
||||||
if ( readGroupAttr != null ) {
|
if ( readGroupAttr != null ) {
|
||||||
SAMReadGroupRecord readGroup = header.getReadGroup(readGroupAttr.toString());
|
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup(readGroupAttr.toString());
|
||||||
if ( readGroup != null ) {
|
if ( readGroup != null ) {
|
||||||
Object readPlatformAttr = readGroup.getAttribute("PL");
|
Object readPlatformAttr = readGroup.getAttribute("PL");
|
||||||
if ( readPlatformAttr != null )
|
if ( readPlatformAttr != null )
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue