Porting SAM pileup ROD to Tribble as a case study.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3356 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6839c194cb
commit
3e9ad4bbd0
|
|
@ -1,693 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFileWalker;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* This class wraps Maq/samtools allele calls from pileup format and presents them as a ROD.<br>
|
||||
*
|
||||
* Example format:<br>
|
||||
* for SNP:<br>
|
||||
* [chr] [pos] [ref] [consensus allele(s)] [consensus confidence] [snp confidence] [max mapping qual] [num reads in the pile] <br>
|
||||
* chrX 466 T Y 170 170 88 32 ... (piles of read bases and quals follow) <br>
|
||||
* <br>
|
||||
* for indel: <br>
|
||||
* [chr] [pos] [always *] [consensus alleles] [consensus conf.?] [indel conf.?] [max mapping qual] [num reads in the pile] [indel] [always *?] [reads haveindel] [reads may have indel] [other reads?]<br>
|
||||
* chrX 141444 * +CA/+CA 32 468 255 25 +CA * 5 2 12 <br>
|
||||
* User: asivache
|
||||
* Date: Apr 03, 2009
|
||||
* Time: 2:58:33 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
|
||||
public class SAMPileupRecord {
|
||||
private static final int NO_VARIANT = -1;
|
||||
private static final int SNP_VARIANT = 0;
|
||||
private static final int INSERTION_VARIANT = 1;
|
||||
private static final int DELETION_VARIANT = 2;
|
||||
private static final int INDEL_VARIANT = 3;
|
||||
|
||||
// allocate once and don't ever bother creating them again:
|
||||
private static final String baseA = new String("A");
|
||||
private static final String baseC = new String("C");
|
||||
private static final String baseG = new String("G");
|
||||
private static final String baseT = new String("T");
|
||||
private static final String emptyStr = new String(); // we will use this for "reference" allele in insertions
|
||||
|
||||
protected GenomeLoc loc; // genomic location of this genotyped site
|
||||
// Reference sequence chromosome or scaffold
|
||||
// Start and stop positions in chrom
|
||||
|
||||
protected char refBaseChar; // what we have set for the reference base (is set to a '*' for indel!)
|
||||
protected String refBases; // the reference base sequence according to NCBI; single base for point mutations, deleted bases for deletions, empty string for insertions
|
||||
protected String observedString; // stores the actual string representation of observed alleles (for point mutations stores as A/C, not in extended alphabet!!)
|
||||
|
||||
protected String pileupQuals; // the read base qualities
|
||||
protected String pileupBases; // the read bases themselves
|
||||
|
||||
protected List<String> observedAlleles = null; // The sequences of the observed alleles (e.g. {"A","C"} for point mutation or {"","+CC"} for het. insertion
|
||||
protected int varType = NO_VARIANT;
|
||||
protected int ploidy = 2; // how many allelic variants we have?
|
||||
protected int nNonref = 0; // number of non-reference alleles observed
|
||||
protected int eventLength = 0; // number of inserted or deleted bases
|
||||
|
||||
protected double consensusScore = 0;
|
||||
protected double variantScore = 0;
|
||||
|
||||
private char fldDelim = '\t';
|
||||
private String name = emptyStr;
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public SAMPileupRecord(String name) {
|
||||
this.name = name; // keeps track name
|
||||
}
|
||||
|
||||
public boolean parseLine(final String line) {
|
||||
// 0 1 2 3 4 5 6 7
|
||||
//* chrX 466 T Y 170 170 88 32 ... (piles of read bases and quals follow)
|
||||
//* chrX 141444 * +CA/+CA 32 468 255 25 +CA * 5 2 12 6
|
||||
try {
|
||||
|
||||
int[] pos = Utils.indexOfAll(line, fldDelim );
|
||||
String contig = line.substring(0,pos[0] ); // field 0
|
||||
long start = Long.parseLong(line.substring(pos[0]+1, pos[1])) ; // field 1
|
||||
|
||||
refBaseChar = Character.toUpperCase(line.charAt(pos[1]+1)); // field 2 ( single char)
|
||||
|
||||
observedString = line.substring(pos[2]+1, pos[3]).toUpperCase(); // field 3
|
||||
observedAlleles = new ArrayList<String>(2);
|
||||
|
||||
consensusScore = Double.parseDouble(line.substring(pos[3]+1,pos[4]));
|
||||
variantScore = Double.parseDouble(line.substring(pos[4]+1, pos[5]));
|
||||
|
||||
if ( refBaseChar == '*' ) {
|
||||
|
||||
parseIndels(observedString) ;
|
||||
if ( varType == DELETION_VARIANT ) loc = GenomeLocParser.createGenomeLoc(contig, start, start+eventLength-1);
|
||||
else loc = GenomeLocParser.createGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
} else {
|
||||
parseBasesAndQuals(line,pos[7]+1,pos[8], pos[8]+1, ( pos.length > 9 ? pos[9] : line.length()) );
|
||||
// parseBasesAndQuals(line.substring(pos[7]+1,pos[8]), line.substring(pos[8]+1, ( pos.length > 9 ? pos[9] : line.length()) ) );
|
||||
// if the variant is a SNP or a reference base (i.e. no variant at all)
|
||||
if ( observedString.length() != 1 ) throw new RuntimeException( "point mutation genotype is expected to be represented by a single letter");
|
||||
|
||||
refBases = line.substring(pos[1]+1, pos[2]).toUpperCase();
|
||||
eventLength = 1;
|
||||
//loc = new GenomeLoc(contig, start, start+1);
|
||||
loc = GenomeLocParser.createGenomeLoc(contig, start, start);
|
||||
|
||||
char ch = observedString.charAt(0);
|
||||
|
||||
switch ( ch ) {
|
||||
case 'A': observedAlleles.add(baseA); observedAlleles.add(baseA); break;
|
||||
case 'C': observedAlleles.add(baseC); observedAlleles.add(baseC); break;
|
||||
case 'G': observedAlleles.add(baseG); observedAlleles.add(baseG); break;
|
||||
case 'T': observedAlleles.add(baseT); observedAlleles.add(baseT); break;
|
||||
case 'M': observedAlleles.add(baseA); observedAlleles.add(baseC); break;
|
||||
case 'R': observedAlleles.add(baseA); observedAlleles.add(baseG); break;
|
||||
case 'W': observedAlleles.add(baseA); observedAlleles.add(baseT); break;
|
||||
case 'S': observedAlleles.add(baseC); observedAlleles.add(baseG); break;
|
||||
case 'Y': observedAlleles.add(baseC); observedAlleles.add(baseT); break;
|
||||
case 'K': observedAlleles.add(baseG); observedAlleles.add(baseT); break;
|
||||
}
|
||||
if ( observedAlleles.get(0).charAt(0) == refBaseChar && observedAlleles.get(1).charAt(0) == refBaseChar ) varType = NO_VARIANT;
|
||||
else {
|
||||
// we know that at least one allele is non-ref;
|
||||
// if one is ref and the other is non-ref, or if both are non ref but they are the same (i.e.
|
||||
// homozygous non-ref), we still have 2 allelic variants at the site (e.g. one ref and one nonref)
|
||||
varType = SNP_VARIANT;
|
||||
if ( observedAlleles.get(0).charAt(0) == refBaseChar ||
|
||||
observedAlleles.get(1).charAt(0) == refBaseChar ||
|
||||
observedAlleles.get(0) == observedAlleles.get(1)
|
||||
) nNonref = 1;
|
||||
else nNonref = 2; // if both observations differ from ref and they are not equal to one another, then we get multiallelic site...
|
||||
}
|
||||
}
|
||||
} catch ( RuntimeException e ) {
|
||||
System.out.printf(" Exception caught during parsing BasicPileup line: %s%n", line);
|
||||
throw e;
|
||||
}
|
||||
return true;
|
||||
// if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: multi-allelic variant : ("+refBaseChar+") -->"+toMediumString());
|
||||
}
|
||||
|
||||
|
||||
/** Parses a line from SAM pileup file into this SAMPileupRecord object..
|
||||
* @param parts line from SAM pileup file, split on delimiter character (tab)
|
||||
*/
|
||||
public boolean parseLine(final String[] parts) {
|
||||
// 0 1 2 3 4 5 6 7
|
||||
// * chrX 466 T Y 170 170 88 32 ... (piles of read bases and quals follow)
|
||||
// * chrX 141444 * +CA/+CA 32 468 255 25 +CA * 5 2 12 6
|
||||
try {
|
||||
|
||||
String contig = parts[0];
|
||||
long start = Long.parseLong(parts[1]) ;
|
||||
|
||||
refBaseChar = Character.toUpperCase(parts[2].charAt(0));
|
||||
|
||||
parts[3] = parts[3].toUpperCase();
|
||||
observedString = parts[3];
|
||||
observedAlleles = new ArrayList<String>(2);
|
||||
|
||||
consensusScore = Double.parseDouble(parts[4]);
|
||||
variantScore = Double.parseDouble(parts[5]);
|
||||
|
||||
if ( refBaseChar == '*' ) {
|
||||
|
||||
parseIndels(parts[3]) ;
|
||||
if ( varType == DELETION_VARIANT ) loc = GenomeLocParser.parseGenomeLoc(contig, start, start+eventLength-1);
|
||||
else loc = GenomeLocParser.parseGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
}
|
||||
else {
|
||||
parseBasesAndQuals(parts[8], parts[9]);
|
||||
// if the variant is a SNP or a reference base (i.e. no variant at all)
|
||||
if ( parts[3].length() != 1 ) throw new RuntimeException( "point mutation genotype is expected to be represented by a single letter");
|
||||
|
||||
refBases = parts[2].toUpperCase();
|
||||
eventLength = 1;
|
||||
//loc = GenomeLoc.parseGenomeLoc(contig, start, start+1);
|
||||
loc = GenomeLocParser.parseGenomeLoc(contig, start, start);
|
||||
|
||||
char ch = parts[3].charAt(0);
|
||||
|
||||
switch ( ch ) {
|
||||
case 'A': observedAlleles.add(baseA); observedAlleles.add(baseA); break;
|
||||
case 'C': observedAlleles.add(baseC); observedAlleles.add(baseC); break;
|
||||
case 'G': observedAlleles.add(baseG); observedAlleles.add(baseG); break;
|
||||
case 'T': observedAlleles.add(baseT); observedAlleles.add(baseT); break;
|
||||
case 'M': observedAlleles.add(baseA); observedAlleles.add(baseC); break;
|
||||
case 'R': observedAlleles.add(baseA); observedAlleles.add(baseG); break;
|
||||
case 'W': observedAlleles.add(baseA); observedAlleles.add(baseT); break;
|
||||
case 'S': observedAlleles.add(baseC); observedAlleles.add(baseG); break;
|
||||
case 'Y': observedAlleles.add(baseC); observedAlleles.add(baseT); break;
|
||||
case 'K': observedAlleles.add(baseG); observedAlleles.add(baseT); break;
|
||||
}
|
||||
if ( observedAlleles.get(0).charAt(0) == refBaseChar && observedAlleles.get(1).charAt(0) == refBaseChar ) varType = NO_VARIANT;
|
||||
else {
|
||||
// we know that at least one allele is non-ref;
|
||||
// if one is ref and the other is non-ref, or if both are non ref but they are the same (i.e.
|
||||
// homozygous non-ref), we still have 2 allelic variants at the site (e.g. one ref and one nonref)
|
||||
varType = SNP_VARIANT;
|
||||
if ( observedAlleles.get(0).charAt(0) == refBaseChar ||
|
||||
observedAlleles.get(1).charAt(0) == refBaseChar ||
|
||||
observedAlleles.get(0) == observedAlleles.get(1)
|
||||
) nNonref = 1;
|
||||
else nNonref = 2; // if both observations differ from ref and they are not equal to one another, then we get multiallelic site...
|
||||
}
|
||||
}
|
||||
} catch ( RuntimeException e ) {
|
||||
System.out.printf(" Exception caught during parsing BasicPileup line: %s%n", Utils.join(" <=> ", parts));
|
||||
throw e;
|
||||
}
|
||||
return true;
|
||||
// if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: multi-allelic variant : ("+refBaseChar+") -->"+toMediumString());
|
||||
}
|
||||
|
||||
|
||||
private void parseIndels(String genotype) {
|
||||
String [] obs = genotype.split("/"); // get observations, now need to tinker with them a bit
|
||||
|
||||
// if reference allele is among the observed alleles, we will need to take special care of it since we do not have direct access to the reference;
|
||||
// if we have an insertion, the "reference" allele is going to be empty; if it it is a deletion, we will deduce the "reference allele" bases
|
||||
// from what we have recorded for the deletion allele (e.g. "-CAC")
|
||||
boolean hasRefAllele = false;
|
||||
|
||||
for ( int i = 0 ; i < obs.length ; i++ ) {
|
||||
if ( obs[i].length() == 1 && obs[i].charAt(0) == '*' ) {
|
||||
hasRefAllele = true;
|
||||
observedAlleles.add(emptyStr);
|
||||
continue;
|
||||
}
|
||||
|
||||
String varBases = obs[i].toUpperCase();
|
||||
|
||||
switch ( obs[i].charAt(0) ) {
|
||||
case '+':
|
||||
if ( varType != NO_VARIANT && varType != INSERTION_VARIANT ) varType = INDEL_VARIANT;
|
||||
else varType = INSERTION_VARIANT;
|
||||
refBases = emptyStr;
|
||||
break;
|
||||
case '-' :
|
||||
if ( varType != NO_VARIANT && varType != DELETION_VARIANT ) varType = INDEL_VARIANT;
|
||||
else varType = DELETION_VARIANT;
|
||||
refBases = varBases; // remember what was deleted, this will be saved as "reference allele"
|
||||
break;
|
||||
default: throw new RuntimeException("Can not interpret observed indel allele record: "+genotype);
|
||||
}
|
||||
observedAlleles.add(varBases);
|
||||
eventLength = obs[i].length() - 1; // inconsistent for non-biallelic indels!!
|
||||
}
|
||||
if ( hasRefAllele ) {
|
||||
// we got at least one ref. allele (out of two recorded)
|
||||
if ( varType == NO_VARIANT ) { // both top theories are actually ref allele;
|
||||
nNonref = 0; // no observations of non-reference allele at all
|
||||
refBases = emptyStr;
|
||||
} else {
|
||||
nNonref = 1; // hasRefAllele = true, so one allele was definitely ref, hence there is only one left
|
||||
}
|
||||
} else {
|
||||
// we observe two non-ref alleles; they better be the same variant, otherwise the site is not bi-allelic and at the moment we
|
||||
// fail to set data in a consistent way.
|
||||
if ( observedAlleles.get(0).equals(observedAlleles.get(1)) ) nNonref = 1;
|
||||
else nNonref = 2;
|
||||
}
|
||||
// DONE with indels
|
||||
|
||||
}
|
||||
|
||||
private void parseBasesAndQuals(final String bases, final String quals)
|
||||
{
|
||||
//System.out.printf("%s%n%s%n", bases, quals);
|
||||
|
||||
// needs to convert the base string with it's . and , to the ref base
|
||||
StringBuilder baseBuilder = new StringBuilder();
|
||||
StringBuilder qualBuilder = new StringBuilder();
|
||||
boolean done = false;
|
||||
for ( int i = 0, j = 0; i < bases.length() && ! done; i++ ) {
|
||||
//System.out.printf("%d %d%n", i, j);
|
||||
char c = (char)bases.charAt(i);
|
||||
|
||||
switch ( c ) {
|
||||
case '.': // matches reference
|
||||
case ',': // matches reference
|
||||
baseBuilder.append(refBaseChar);
|
||||
qualBuilder.append((char)quals.charAt(j++));
|
||||
break;
|
||||
case '$': // end of read
|
||||
break;
|
||||
case '*': // end of indel?
|
||||
j++;
|
||||
break;
|
||||
case '^': // mapping quality
|
||||
i++;
|
||||
break;
|
||||
case '+': // start of indel
|
||||
case '-': // start of indel
|
||||
final Pattern regex = Pattern.compile("([0-9]+).*"); // matches case 1
|
||||
final String rest = bases.substring(i+1);
|
||||
//System.out.printf("sub is %s%n", rest);
|
||||
Matcher match = regex.matcher(rest);
|
||||
if ( ! match.matches() ) {
|
||||
if ( refBaseChar != '*' )
|
||||
throw new RuntimeException("Bad pileup format: " + bases + " at position " + i);
|
||||
done = true;
|
||||
}
|
||||
else {
|
||||
String g = match.group(1);
|
||||
//System.out.printf("group is %d, match is %s%n", match.groupCount(), g);
|
||||
int l = Integer.parseInt(g);
|
||||
i += l + g.length(); // length of number + that many bases + +/- at the start (included in the next i++)
|
||||
//System.out.printf("remaining is %d => %s%n", l, bases.substring(i+1));
|
||||
}
|
||||
break;
|
||||
default: // non reference base
|
||||
baseBuilder.append(c);
|
||||
qualBuilder.append((char)quals.charAt(j++));
|
||||
}
|
||||
}
|
||||
|
||||
pileupBases = baseBuilder.toString();
|
||||
pileupQuals = qualBuilder.toString();
|
||||
}
|
||||
|
||||
private void parseBasesAndQuals(final String line, int basesStart, int basesStop, int qualsStart, int qualsStop)
|
||||
{
|
||||
//System.out.printf("%s%n%s%n", bases, quals);
|
||||
|
||||
// needs to convert the base string with it's . and , to the ref base
|
||||
StringBuilder baseBuilder = new StringBuilder();
|
||||
StringBuilder qualBuilder = new StringBuilder();
|
||||
boolean done = false;
|
||||
for ( int i = basesStart, j = qualsStart; i < basesStop && ! done; i++ ) {
|
||||
//System.out.printf("%d %d%n", i, j);
|
||||
char c = (char)line.charAt(i);
|
||||
|
||||
switch ( c ) {
|
||||
case '.': // matches reference
|
||||
case ',': // matches reference
|
||||
baseBuilder.append(refBaseChar);
|
||||
qualBuilder.append((char)line.charAt(j++));
|
||||
break;
|
||||
case '$': // end of read
|
||||
break;
|
||||
case '*': // end of indel?
|
||||
j++;
|
||||
break;
|
||||
case '^': // mapping quality
|
||||
i++;
|
||||
break;
|
||||
case '+': // start of indel
|
||||
case '-': // start of indel
|
||||
final Pattern regex = Pattern.compile("([0-9]+).*"); // matches case 1
|
||||
final String rest = line.substring(i+1,basesStop);
|
||||
//System.out.printf("sub is %s%n", rest);
|
||||
Matcher match = regex.matcher(rest);
|
||||
if ( ! match.matches() ) {
|
||||
if ( refBaseChar != '*' )
|
||||
throw new RuntimeException("Bad pileup format: " + line.substring(basesStart, basesStop) + " at position " + (i-basesStart));
|
||||
done = true;
|
||||
}
|
||||
else {
|
||||
String g = match.group(1);
|
||||
//System.out.printf("group is %d, match is %s%n", match.groupCount(), g);
|
||||
int l = Integer.parseInt(g);
|
||||
i += l + g.length(); // length of number + that many bases + +/- at the start (included in the next i++)
|
||||
//System.out.printf("remaining is %d => %s%n", l, bases.substring(i+1));
|
||||
}
|
||||
break;
|
||||
default: // non reference base
|
||||
baseBuilder.append(c);
|
||||
qualBuilder.append((char)line.charAt(j++));
|
||||
}
|
||||
}
|
||||
|
||||
pileupBases = baseBuilder.toString();
|
||||
pileupQuals = qualBuilder.toString();
|
||||
}
|
||||
|
||||
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
public String getQualsAsString() { return pileupQuals; }
|
||||
|
||||
/** Returns reference base for point genotypes or '*' for indel genotypes, as a char.
|
||||
*
|
||||
*/
|
||||
public char getRef() { return refBaseChar; }
|
||||
public int size() { return pileupQuals.length(); }
|
||||
|
||||
/** Returns pile of observed bases over the current genomic location.
|
||||
*
|
||||
*/
|
||||
public String getBasesAsString() { return pileupBases; }
|
||||
|
||||
/** Returns formatted pileup string for the current genomic location as
|
||||
* "location: reference_base observed_base_pile observed_qual_pile"
|
||||
*/
|
||||
public String getPileupString()
|
||||
{
|
||||
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
|
||||
}
|
||||
|
||||
public ArrayList<Byte> getBasesAsArrayList() { throw new StingException("Not implemented"); }
|
||||
public ArrayList<Byte> getQualsAsArrayList() { throw new StingException("Not implemented"); }
|
||||
|
||||
/**
|
||||
* Gets the bases in byte array form.
|
||||
* @return byte array of the available bases.
|
||||
*/
|
||||
public byte[] getBases() {
|
||||
return StringUtil.stringToBytes(getBasesAsString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Phred base qualities without ASCII offset.
|
||||
* @return Phred base qualities.
|
||||
*/
|
||||
public byte[] getQuals() {
|
||||
byte[] quals = StringUtil.stringToBytes(getQualsAsString());
|
||||
for(int i = 0; i < quals.length; i++) quals[i] -= 33;
|
||||
return quals;
|
||||
}
|
||||
|
||||
/** Returns bases in the reference allele as a String. For point genotypes, the string consists of a single
|
||||
* character (reference base). For indel genotypes, the string is empty for insertions into
|
||||
* the reference, or consists of deleted bases for deletions.
|
||||
*
|
||||
* @return reference allele, forward strand
|
||||
*/
|
||||
public String getFWDRefBases() {
|
||||
return refBases;
|
||||
}
|
||||
|
||||
|
||||
public List<String> getFWDAlleles() {
|
||||
return observedAlleles;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// What kind of variant are we?
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public boolean isSNP() { return varType == SNP_VARIANT ; }
|
||||
public boolean isInsertion() { return varType == INSERTION_VARIANT; }
|
||||
public boolean isDeletion() { return varType == DELETION_VARIANT ; }
|
||||
public boolean isIndel() { return isInsertion() || isDeletion() || varType == INDEL_VARIANT; }
|
||||
public boolean isReference() { return varType == NO_VARIANT; }
|
||||
|
||||
public boolean isHom() {
|
||||
// implementation-dependent: here we use the fact that for ref and snps we actually use fixed static strings to remember the genotype
|
||||
if ( ! isIndel() ) return ( observedAlleles.get(0) == observedAlleles.get(1) );
|
||||
return ( isInsertion() || isDeletion() ) && observedAlleles.get(0).equals(observedAlleles.get(1) );
|
||||
}
|
||||
|
||||
public boolean isHet() {
|
||||
// implementation-dependent: here we use the fact that for ref and snps we actually use fixed static strings to remember the genotype
|
||||
if ( ! isIndel() ) return ( observedAlleles.get(0) != observedAlleles.get(1) );
|
||||
return isIndel() || ( ! observedAlleles.get(0).equals(observedAlleles.get(1) ) );
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// formatting
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%d\t%s\t%s\t%s",
|
||||
getLocation().getContig(), getLocation().getStart(), getLocation().getStop(), name, refBases, observedString);
|
||||
}
|
||||
|
||||
public String toSimpleString() {
|
||||
return String.format("%s:%s", name, observedString);
|
||||
}
|
||||
|
||||
public String toMediumString() {
|
||||
|
||||
String s = null;
|
||||
if ( refBaseChar == '*' ) s = String.format("%s:%s:%s", getLocation().toString(), name,observedString);
|
||||
else s = String.format("%s:%s:%s/%s", getLocation().toString(), name, observedAlleles.get(0),observedAlleles.get(1));
|
||||
if ( isSNP() ) s += ": SNP";
|
||||
else {
|
||||
if ( isInsertion() ) s += ": Insertion";
|
||||
else {
|
||||
if ( isDeletion() ) s+= ": Deletion";
|
||||
else {
|
||||
if ( isIndel() ) s+=": Indel";
|
||||
else s+=": Reference";
|
||||
}
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
public String repl() {
|
||||
return String.format("REPL not implemented yet");
|
||||
}
|
||||
|
||||
|
||||
public double getVariantConfidence() {
|
||||
return variantScore;
|
||||
}
|
||||
|
||||
|
||||
public boolean isBiallelic() {
|
||||
return nNonref < 2;
|
||||
}
|
||||
|
||||
public double getConsensusConfidence() {
|
||||
return consensusScore;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return eventLength;
|
||||
}
|
||||
|
||||
public boolean isIndelGenotype() {
|
||||
return refBaseChar == '*';
|
||||
}
|
||||
|
||||
|
||||
public boolean isPointGenotype() {
|
||||
return ! isIndelGenotype();
|
||||
}
|
||||
|
||||
/** Implements method required by GenotypeList interface. If this object represents
|
||||
* an indel genotype, then it returns itself through this method. If this object is a
|
||||
* point genotype, this method returns null.
|
||||
* @return
|
||||
*/
|
||||
public SAMPileupRecord getIndelGenotype() {
|
||||
if ( isIndelGenotype() ) return this;
|
||||
else return null;
|
||||
}
|
||||
|
||||
/** Implements method required by GenotypeList interface. If this object represents
|
||||
* a point genotype, then it returns itself through this method. If this object is an
|
||||
* indel genotype, this method returns null.
|
||||
* @return
|
||||
*/
|
||||
public SAMPileupRecord getPointGenotype() {
|
||||
if ( isPointGenotype() ) return this;
|
||||
else return null;
|
||||
}
|
||||
|
||||
/** Returns true if this object \em is an indel genotype (and thus
|
||||
* indel genotype is what it only has).
|
||||
* @return
|
||||
*/
|
||||
public boolean hasIndelGenotype() {
|
||||
return isIndelGenotype();
|
||||
}
|
||||
|
||||
/** Returns true if this object \em is a point genotype (and thus
|
||||
* point genotype is what it only has.
|
||||
* @return
|
||||
*/
|
||||
public boolean hasPointGenotype() {
|
||||
return isPointGenotype();
|
||||
}
|
||||
|
||||
public int compareTo(ReferenceOrderedDatum o) {
|
||||
return getLocation().compareTo(o.getLocation());
|
||||
}
|
||||
|
||||
protected static class pileupFileIterator implements Iterator<SAMPileupRecord> {
|
||||
// private String fieldDelimiter = new String("\t");
|
||||
private String rodName = null;
|
||||
private XReadLines parser = null;
|
||||
private String lastProcessedLine = null;
|
||||
|
||||
// private long z = 0;
|
||||
// private long t = 0;
|
||||
|
||||
pileupFileIterator(String name, java.io.File f) {
|
||||
try {
|
||||
parser = new XReadLines(f);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
Utils.scareUser("Couldn't open file: " + f);
|
||||
}
|
||||
rodName = name;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return parser.hasNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the next element in the iteration.
|
||||
* @throws NoSuchElementException - iterator has no more elements.
|
||||
*/
|
||||
public SAMPileupRecord next() {
|
||||
if (!this.hasNext()) throw new NoSuchElementException("SAMPileupRecord next called on iterator with no more elements");
|
||||
// if ( z == 0 ) t = System.currentTimeMillis();
|
||||
lastProcessedLine = parser.next();
|
||||
SAMPileupRecord n = new SAMPileupRecord(rodName);
|
||||
//System.out.printf("Line is %s%n", line);
|
||||
// String parts[] = lastProcessedLine.split(fieldDelimiter);
|
||||
// n.parseLine(parts);
|
||||
|
||||
n.parseLine(lastProcessedLine);
|
||||
// if ( ++z == 1000000 ) { System.out.println(rodName + ": 1,000,000 records read in "+((double)(System.currentTimeMillis()-t)/1000.0) +" s"); z = 0; }
|
||||
return n ;
|
||||
}
|
||||
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static Iterator<SAMPileupRecord> createIterator(String name, java.io.File f) {
|
||||
return new pileupFileIterator(name,f);
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static void main(String argv[]) {
|
||||
// String testFile = "/humgen/gsa-scr1/asivache/TCGA/Ovarian/C2K/0805/normal.pileup";
|
||||
String testFile = "/humgen/gsa-scr1/asivache/trios/CEU/NA12891.12892.12878/mother.chr1.pileup.indel";
|
||||
|
||||
Iterator<SAMPileupRecord> it = createIterator("test-normal", new java.io.File(testFile));
|
||||
|
||||
ReferenceSequenceFileWalker reference = new ReferenceSequenceFileWalker(
|
||||
//new java.io.File( "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")
|
||||
new java.io.File( "/humgen/gsa-scr1/asivache/trios/CEU/NA12891.12892.12878/human_b36_both.fasta")
|
||||
);
|
||||
|
||||
if ( reference.getSequenceDictionary() == null ) {
|
||||
System.out.println("No reference sequence dictionary found. Abort.");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||
|
||||
int counter = 0;
|
||||
|
||||
while ( it.hasNext() && counter < 430 ) {
|
||||
SAMPileupRecord p = it.next();
|
||||
|
||||
System.out.print(p.getLocation().toString());
|
||||
System.out.print('\t');
|
||||
|
||||
if ( p.isIndel() && p.isSNP() ) { System.out.print("Indel+SNP"); }
|
||||
else {
|
||||
if ( p.isSNP() ) { System.out.print("SNP"); }
|
||||
else {
|
||||
if ( p.isIndel() ) { System.out.print("Indel"); }
|
||||
else { System.out.print("REF"); }
|
||||
}
|
||||
}
|
||||
|
||||
System.out.print('\t');
|
||||
System.out.print(p.getFWDAlleles().get(0)+"/"+p.getFWDAlleles().get(1));
|
||||
System.out.print('\t');
|
||||
System.out.println(p.getConsensusConfidence()+"\t"+p.getVariantConfidence());
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,250 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.features.sampileup;
|
||||
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.exception.CodecLineParsingException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import static org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature.VariantType;
|
||||
|
||||
/**
|
||||
* A Tribble encoder / decoder for SAM pileup data.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMPileupCodec implements FeatureCodec {
|
||||
// the number of tokens we expect to parse from a dbSNP line
|
||||
private static final int expectedTokenCount = 10;
|
||||
private static final char fldDelim = '\t';
|
||||
|
||||
// allocate once and don't ever bother creating them again:
|
||||
private static final String baseA = "A";
|
||||
private static final String baseC = "C";
|
||||
private static final String baseG = "G";
|
||||
private static final String baseT = "T";
|
||||
private static final String emptyStr = ""; // we will use this for "reference" allele in insertions
|
||||
|
||||
/**
|
||||
* Return the # of header lines for this file.
|
||||
*
|
||||
* @param f the file
|
||||
* @return 0 in this case, we assume no header lines. The DbSNP file may have a
|
||||
* header line beginning with '#', but we can ignore that in the decode function.
|
||||
*/
|
||||
public int headerLineCount(File f) {
|
||||
// we don't require a header line, but it may exist. We'll deal with that above.
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Feature decode(String line) {
|
||||
// 0 1 2 3 4 5 6 7
|
||||
//* chrX 466 T Y 170 170 88 32 ... (piles of read bases and quals follow)
|
||||
//* chrX 141444 * +CA/+CA 32 468 255 25 +CA * 5 2 12 6
|
||||
String[] tokens = new String[expectedTokenCount];
|
||||
|
||||
// split the line
|
||||
int count = ParsingUtils.split(line,tokens,fldDelim);
|
||||
|
||||
// check to see if we've parsed the string into the right number of tokens (expectedTokenCount)
|
||||
if (count != expectedTokenCount)
|
||||
throw new CodecLineParsingException("the SAM pileup line didn't have the expected number of tokens " +
|
||||
"(expected = " + expectedTokenCount + ", saw = " + count + " on " +
|
||||
"line = " + line + ")");
|
||||
|
||||
SAMPileupFeature feature = new SAMPileupFeature(tokens[0],
|
||||
0,
|
||||
0);
|
||||
|
||||
feature.setStart(Integer.parseInt(tokens[1]));
|
||||
|
||||
if(tokens[2].length() != 1)
|
||||
throw new CodecLineParsingException("The SAM pileup line had unexpected base " + tokens[2] + " on line = " + line);
|
||||
feature.setRef(Character.toUpperCase(tokens[2].charAt(0)));
|
||||
|
||||
String observedString = tokens[3].toUpperCase(); // field 3
|
||||
feature.setFWDAlleles(new ArrayList<String>(2));
|
||||
|
||||
feature.setConsensusConfidence(Double.parseDouble(tokens[4]));
|
||||
feature.setVariantConfidence(Double.parseDouble(tokens[5]));
|
||||
|
||||
if ( feature.getRef() == '*' ) {
|
||||
parseIndels(observedString,feature) ;
|
||||
if ( feature.isDeletion() ) feature.setEnd(feature.getStart()+feature.length()-1);
|
||||
else feature.setEnd(feature.getStart()-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
} else {
|
||||
parseBasesAndQuals(feature,tokens[8],tokens[9]);
|
||||
// if the variant is a SNP or a reference base (i.e. no variant at all)
|
||||
if ( observedString.length() != 1 ) throw new RuntimeException( "point mutation genotype is expected to be represented by a single letter");
|
||||
feature.setRefBases(tokens[2].toUpperCase());
|
||||
feature.setEnd(feature.getStart());
|
||||
|
||||
char ch = observedString.charAt(0);
|
||||
|
||||
switch ( ch ) {
|
||||
case 'A': feature.getFWDAlleles().add(baseA); feature.getFWDAlleles().add(baseA); break;
|
||||
case 'C': feature.getFWDAlleles().add(baseC); feature.getFWDAlleles().add(baseC); break;
|
||||
case 'G': feature.getFWDAlleles().add(baseG); feature.getFWDAlleles().add(baseG); break;
|
||||
case 'T': feature.getFWDAlleles().add(baseT); feature.getFWDAlleles().add(baseT); break;
|
||||
case 'M': feature.getFWDAlleles().add(baseA); feature.getFWDAlleles().add(baseC); break;
|
||||
case 'R': feature.getFWDAlleles().add(baseA); feature.getFWDAlleles().add(baseG); break;
|
||||
case 'W': feature.getFWDAlleles().add(baseA); feature.getFWDAlleles().add(baseT); break;
|
||||
case 'S': feature.getFWDAlleles().add(baseC); feature.getFWDAlleles().add(baseG); break;
|
||||
case 'Y': feature.getFWDAlleles().add(baseC); feature.getFWDAlleles().add(baseT); break;
|
||||
case 'K': feature.getFWDAlleles().add(baseG); feature.getFWDAlleles().add(baseT); break;
|
||||
}
|
||||
if ( feature.getFWDAlleles().get(0).charAt(0) == feature.getRef() && feature.getFWDAlleles().get(1).charAt(0) == feature.getRef() ) feature.setVariantType(VariantType.NONE);
|
||||
else {
|
||||
// we know that at least one allele is non-ref;
|
||||
// if one is ref and the other is non-ref, or if both are non ref but they are the same (i.e.
|
||||
// homozygous non-ref), we still have 2 allelic variants at the site (e.g. one ref and one nonref)
|
||||
feature.setVariantType(VariantType.SNP);
|
||||
if ( feature.getFWDAlleles().get(0).charAt(0) == feature.getRef() ||
|
||||
feature.getFWDAlleles().get(1).charAt(0) == feature.getRef() ||
|
||||
feature.getFWDAlleles().get(0).equals(feature.getFWDAlleles().get(1))
|
||||
) feature.setNumNonRef(1);
|
||||
else feature.setNumNonRef(2); // if both observations differ from ref and they are not equal to one another, then we get multiallelic site...
|
||||
}
|
||||
}
|
||||
|
||||
return feature;
|
||||
}
|
||||
|
||||
private void parseIndels(String genotype,SAMPileupFeature feature) {
|
||||
String [] obs = genotype.split("/"); // get observations, now need to tinker with them a bit
|
||||
|
||||
// if reference allele is among the observed alleles, we will need to take special care of it since we do not have direct access to the reference;
|
||||
// if we have an insertion, the "reference" allele is going to be empty; if it it is a deletion, we will deduce the "reference allele" bases
|
||||
// from what we have recorded for the deletion allele (e.g. "-CAC")
|
||||
boolean hasRefAllele = false;
|
||||
|
||||
for ( int i = 0 ; i < obs.length ; i++ ) {
|
||||
if ( obs[i].length() == 1 && obs[i].charAt(0) == '*' ) {
|
||||
hasRefAllele = true;
|
||||
feature.getFWDAlleles().add(emptyStr);
|
||||
continue;
|
||||
}
|
||||
|
||||
String varBases = obs[i].toUpperCase();
|
||||
|
||||
switch ( obs[i].charAt(0) ) {
|
||||
case '+':
|
||||
if (!feature.isReference() && !feature.isInsertion()) feature.setVariantType(VariantType.INDEL);
|
||||
else feature.setVariantType(VariantType.INSERTION);
|
||||
feature.setRefBases(emptyStr);
|
||||
break;
|
||||
case '-' :
|
||||
if (!feature.isReference() && !feature.isDeletion()) feature.setVariantType(VariantType.INDEL);
|
||||
else feature.setVariantType(VariantType.DELETION);
|
||||
feature.setRefBases(varBases); // remember what was deleted, this will be saved as "reference allele"
|
||||
break;
|
||||
default: throw new RuntimeException("Can not interpret observed indel allele record: "+genotype);
|
||||
}
|
||||
feature.getFWDAlleles().add(varBases);
|
||||
feature.setLength(obs[i].length()-1); // inconsistent for non-biallelic indels!!
|
||||
}
|
||||
if ( hasRefAllele ) {
|
||||
// we got at least one ref. allele (out of two recorded)
|
||||
if (feature.isReference()) { // both top theories are actually ref allele;
|
||||
feature.setNumNonRef(0); // no observations of non-reference allele at all
|
||||
feature.setRefBases(emptyStr);
|
||||
} else {
|
||||
feature.setNumNonRef(1); // hasRefAllele = true, so one allele was definitely ref, hence there is only one left
|
||||
}
|
||||
} else {
|
||||
// we observe two non-ref alleles; they better be the same variant, otherwise the site is not bi-allelic and at the moment we
|
||||
// fail to set data in a consistent way.
|
||||
if ( feature.getFWDAlleles().get(0).equals(feature.getFWDAlleles().get(1))) feature.setNumNonRef(1);
|
||||
else feature.setNumNonRef(2);
|
||||
}
|
||||
// DONE with indels
|
||||
|
||||
}
|
||||
|
||||
private void parseBasesAndQuals(SAMPileupFeature feature, final String bases, final String quals)
|
||||
{
|
||||
//System.out.printf("%s%n%s%n", bases, quals);
|
||||
|
||||
// needs to convert the base string with it's . and , to the ref base
|
||||
StringBuilder baseBuilder = new StringBuilder();
|
||||
StringBuilder qualBuilder = new StringBuilder();
|
||||
boolean done = false;
|
||||
for ( int i = 0, j = 0; i < bases.length() && ! done; i++ ) {
|
||||
//System.out.printf("%d %d%n", i, j);
|
||||
char c = (char)bases.charAt(i);
|
||||
|
||||
switch ( c ) {
|
||||
case '.': // matches reference
|
||||
case ',': // matches reference
|
||||
baseBuilder.append(feature.getRef());
|
||||
qualBuilder.append(quals.charAt(j++));
|
||||
break;
|
||||
case '$': // end of read
|
||||
break;
|
||||
case '*': // end of indel?
|
||||
j++;
|
||||
break;
|
||||
case '^': // mapping quality
|
||||
i++;
|
||||
break;
|
||||
case '+': // start of indel
|
||||
case '-': // start of indel
|
||||
final Pattern regex = Pattern.compile("([0-9]+).*"); // matches case 1
|
||||
final String rest = bases.substring(i+1);
|
||||
//System.out.printf("sub is %s%n", rest);
|
||||
Matcher match = regex.matcher(rest);
|
||||
if ( ! match.matches() ) {
|
||||
if ( feature.getRef() != '*' )
|
||||
throw new RuntimeException("Bad pileup format: " + bases + " at position " + i);
|
||||
done = true;
|
||||
}
|
||||
else {
|
||||
String g = match.group(1);
|
||||
//System.out.printf("group is %d, match is %s%n", match.groupCount(), g);
|
||||
int l = Integer.parseInt(g);
|
||||
i += l + g.length(); // length of number + that many bases + +/- at the start (included in the next i++)
|
||||
//System.out.printf("remaining is %d => %s%n", l, bases.substring(i+1));
|
||||
}
|
||||
break;
|
||||
default: // non reference base
|
||||
baseBuilder.append(c);
|
||||
qualBuilder.append(quals.charAt(j++));
|
||||
}
|
||||
}
|
||||
|
||||
feature.setPileupBases(baseBuilder.toString());
|
||||
feature.setPileupQuals(qualBuilder.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.features.sampileup;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* A tribble feature representing a SAM pileup.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMPileupFeature implements Feature {
|
||||
public enum VariantType { NONE, SNP, INSERTION, DELETION, INDEL };
|
||||
|
||||
private String contig; // genomic location of this genotyped site
|
||||
private int start;
|
||||
private int stop;
|
||||
|
||||
private char refBaseChar; // what we have set for the reference base (is set to a '*' for indel!)
|
||||
private String refBases; // the reference base sequence according to NCBI; single base for point mutations, deleted bases for deletions, empty string for insertions
|
||||
|
||||
private String pileupQuals; // the read base qualities
|
||||
private String pileupBases; // the read bases themselves
|
||||
|
||||
private List<String> observedAlleles = null; // The sequences of the observed alleles (e.g. {"A","C"} for point mutation or {"","+CC"} for het. insertion
|
||||
private VariantType varType = VariantType.NONE;
|
||||
private int nNonref = 0; // number of non-reference alleles observed
|
||||
private int eventLength = 0; // number of inserted or deleted bases
|
||||
|
||||
private double consensusScore = 0;
|
||||
private double variantScore = 0;
|
||||
|
||||
/**
|
||||
* create the dbSNP feature, given the following information:
|
||||
*
|
||||
* @param contig the contig rsID
|
||||
* @param start the start position, one based
|
||||
* @param stop the stop position, one based
|
||||
*/
|
||||
SAMPileupFeature(String contig,
|
||||
int start,
|
||||
int stop) {
|
||||
this.contig = contig;
|
||||
this.start = start;
|
||||
this.stop = stop;
|
||||
}
|
||||
|
||||
public String getChr() {
|
||||
return contig;
|
||||
}
|
||||
|
||||
protected void setChr(String chr) {
|
||||
this.contig = chr;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
protected void setStart(int start) {
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
public int getEnd() {
|
||||
return stop;
|
||||
}
|
||||
|
||||
protected void setEnd(int end) {
|
||||
this.stop = end;
|
||||
}
|
||||
|
||||
public String getQualsAsString() { return pileupQuals; }
|
||||
|
||||
protected void setPileupQuals(String pileupQuals) {
|
||||
this.pileupQuals = pileupQuals;
|
||||
}
|
||||
|
||||
/** Returns reference base for point genotypes or '*' for indel genotypes, as a char.
|
||||
*
|
||||
*/
|
||||
public char getRef() { return refBaseChar; }
|
||||
|
||||
protected void setRef(char ref) {
|
||||
this.refBaseChar = ref;
|
||||
}
|
||||
|
||||
public int size() { return pileupQuals.length(); }
|
||||
|
||||
/** Returns pile of observed bases over the current genomic location.
|
||||
*
|
||||
*/
|
||||
public String getBasesAsString() { return pileupBases; }
|
||||
|
||||
protected void setPileupBases(String pileupBases) {
|
||||
this.pileupBases = pileupBases;
|
||||
}
|
||||
|
||||
/** Returns formatted pileup string for the current genomic location as
|
||||
* "location: reference_base observed_base_pile observed_qual_pile"
|
||||
*/
|
||||
public String getPileupString()
|
||||
{
|
||||
if(start == stop)
|
||||
return String.format("%s:%d: %s %s %s", getChr(), getStart(), getRef(), getBasesAsString(), getQualsAsString());
|
||||
else
|
||||
return String.format("%s:%d-%d: %s %s %s", getChr(), getStart(), getEnd(), getRef(), getBasesAsString(), getQualsAsString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the bases in byte array form.
|
||||
* @return byte array of the available bases.
|
||||
*/
|
||||
public byte[] getBases() {
|
||||
return StringUtil.stringToBytes(getBasesAsString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Phred base qualities without ASCII offset.
|
||||
* @return Phred base qualities.
|
||||
*/
|
||||
public byte[] getQuals() {
|
||||
byte[] quals = StringUtil.stringToBytes(getQualsAsString());
|
||||
for(int i = 0; i < quals.length; i++) quals[i] -= 33;
|
||||
return quals;
|
||||
}
|
||||
|
||||
/** Returns bases in the reference allele as a String. For point genotypes, the string consists of a single
|
||||
* character (reference base). For indel genotypes, the string is empty for insertions into
|
||||
* the reference, or consists of deleted bases for deletions.
|
||||
*
|
||||
* @return reference allele, forward strand
|
||||
*/
|
||||
public String getFWDRefBases() {
|
||||
return refBases;
|
||||
}
|
||||
|
||||
protected void setRefBases(String refBases) {
|
||||
this.refBases = refBases;
|
||||
}
|
||||
|
||||
public List<String> getFWDAlleles() {
|
||||
return observedAlleles;
|
||||
}
|
||||
|
||||
protected void setFWDAlleles(List<String> alleles) {
|
||||
this.observedAlleles = alleles;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// What kind of variant are we?
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public boolean isSNP() { return varType == VariantType.SNP; }
|
||||
public boolean isInsertion() { return varType == VariantType.INSERTION; }
|
||||
public boolean isDeletion() { return varType == VariantType.DELETION ; }
|
||||
public boolean isIndel() { return isInsertion() || isDeletion() || varType == VariantType.INDEL; }
|
||||
public boolean isReference() { return varType == VariantType.NONE; }
|
||||
|
||||
protected void setVariantType(VariantType variantType) {
|
||||
this.varType = variantType;
|
||||
}
|
||||
|
||||
public boolean isHom() {
|
||||
// implementation-dependent: here we use the fact that for ref and snps we actually use fixed static strings to remember the genotype
|
||||
if ( ! isIndel() ) return ( observedAlleles.get(0).equals(observedAlleles.get(1)) );
|
||||
return ( isInsertion() || isDeletion() ) && observedAlleles.get(0).equals(observedAlleles.get(1) );
|
||||
}
|
||||
|
||||
public boolean isHet() {
|
||||
// implementation-dependent: here we use the fact that for ref and snps we actually use fixed static strings to remember the genotype
|
||||
if ( ! isIndel() ) return ( !(observedAlleles.get(0).equals(observedAlleles.get(1))) );
|
||||
return isIndel() || ( ! observedAlleles.get(0).equals(observedAlleles.get(1) ) );
|
||||
}
|
||||
|
||||
public double getVariantConfidence() {
|
||||
return variantScore;
|
||||
}
|
||||
|
||||
protected void setVariantConfidence(double variantScore) {
|
||||
this.variantScore = variantScore;
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
return nNonref < 2;
|
||||
}
|
||||
|
||||
protected void setNumNonRef(int nNonref) {
|
||||
this.nNonref = nNonref;
|
||||
}
|
||||
|
||||
public double getConsensusConfidence() {
|
||||
return consensusScore;
|
||||
}
|
||||
|
||||
protected void setConsensusConfidence(double consensusScore) {
|
||||
this.consensusScore = consensusScore;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return eventLength;
|
||||
}
|
||||
|
||||
protected void setLength(int eventLength) {
|
||||
this.eventLength = eventLength;
|
||||
}
|
||||
|
||||
public boolean isIndelGenotype() {
|
||||
return refBaseChar == '*';
|
||||
}
|
||||
|
||||
|
||||
public boolean isPointGenotype() {
|
||||
return ! isIndelGenotype();
|
||||
}
|
||||
|
||||
/** Implements method required by GenotypeList interface. If this object represents
|
||||
* an indel genotype, then it returns itself through this method. If this object is a
|
||||
* point genotype, this method returns null.
|
||||
* @return
|
||||
*/
|
||||
public SAMPileupFeature getIndelGenotype() {
|
||||
if ( isIndelGenotype() ) return this;
|
||||
else return null;
|
||||
}
|
||||
|
||||
/** Implements method required by GenotypeList interface. If this object represents
|
||||
* a point genotype, then it returns itself through this method. If this object is an
|
||||
* indel genotype, this method returns null.
|
||||
* @return
|
||||
*/
|
||||
public SAMPileupFeature getPointGenotype() {
|
||||
if ( isPointGenotype() ) return this;
|
||||
else return null;
|
||||
}
|
||||
|
||||
/** Returns true if this object \em is an indel genotype (and thus
|
||||
* indel genotype is what it only has).
|
||||
* @return
|
||||
*/
|
||||
public boolean hasIndelGenotype() {
|
||||
return isIndelGenotype();
|
||||
}
|
||||
|
||||
/** Returns true if this object \em is a point genotype (and thus
|
||||
* point genotype is what it only has.
|
||||
* @return
|
||||
*/
|
||||
public boolean hasPointGenotype() {
|
||||
return isPointGenotype();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,225 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFileWalker;
|
||||
|
||||
/**
|
||||
* This class wraps Maq/samtools allele calls from pileup format and presents them as a ROD.<br>
|
||||
*
|
||||
* Example format:<br>
|
||||
* for SNP:<br>
|
||||
* [chr] [pos] [ref] [consensus allele(s)] [consensus confidence] [snp confidence] [max mapping qual] [num reads in the pile] <br>
|
||||
* chrX 466 T Y 170 170 88 32 ... (piles of read bases and quals follow) <br>
|
||||
* <br>
|
||||
* for indel: <br>
|
||||
* [chr] [pos] [always *] [consensus alleles] [consensus conf.?] [indel conf.?] [max mapping qual] [num reads in the pile] [indel] [always *?] [reads haveindel] [reads may have indel] [other reads?]<br>
|
||||
* chrX 141444 * +CA/+CA 32 468 255 25 +CA * 5 2 12 <br>
|
||||
* User: asivache
|
||||
* Date: Apr 03, 2009
|
||||
* Time: 2:58:33 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
public class rodSAMPileup extends BasicReferenceOrderedDatum {
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private SAMPileupRecord indelGenotype = null;
|
||||
private SAMPileupRecord pointGenotype = null;
|
||||
|
||||
public rodSAMPileup(final String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
if ( pointGenotype != null ) return pointGenotype.getLocation();
|
||||
if ( indelGenotype != null ) return indelGenotype.getLocation();
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator,
|
||||
* so this method does nothing at all (always returns false).
|
||||
*
|
||||
*/
|
||||
@Override
|
||||
public boolean parseLine(Object header, String[] parts) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
if ( pointGenotype != null ) {
|
||||
b.append(pointGenotype.toString());
|
||||
if ( indelGenotype != null ) b.append('\n');
|
||||
}
|
||||
if ( indelGenotype != null ) b.append(indelGenotype.toString());
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public SAMPileupRecord getIndelGenotype() {
|
||||
return indelGenotype;
|
||||
}
|
||||
|
||||
public SAMPileupRecord getPointGenotype() {
|
||||
return pointGenotype;
|
||||
}
|
||||
|
||||
public boolean hasIndelGenotype() {
|
||||
return indelGenotype != null;
|
||||
}
|
||||
|
||||
public boolean hasPointGenotype() {
|
||||
return pointGenotype != null;
|
||||
}
|
||||
|
||||
/** Iterates over SAM pileup multiline records: each new invocation of next() will
|
||||
* move to next genomic position in the pileup file, and if the record for that position
|
||||
* consists of multiple lines (as is the case for indels), they all will be read and processed.
|
||||
* @author asivache
|
||||
*
|
||||
*/
|
||||
private static class rodSAMPileupIterator implements Iterator<rodSAMPileup> {
|
||||
//private xReadLines parser = null;
|
||||
private String rodName = null;
|
||||
private PushbackIterator<SAMPileupRecord> parser = null;
|
||||
private long line = 0;
|
||||
|
||||
rodSAMPileupIterator(String name, java.io.File f) {
|
||||
parser = new PushbackIterator<SAMPileupRecord>(SAMPileupRecord.createIterator(name,f));
|
||||
rodName = name;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return parser.hasNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the next element in the iteration.
|
||||
* @throws NoSuchElementException - iterator has no more elements.
|
||||
*/
|
||||
public rodSAMPileup next() {
|
||||
if (!this.hasNext()) throw new NoSuchElementException("rodSAMPileup next called on iterator with no more elements");
|
||||
rodSAMPileup result = new rodSAMPileup(rodName);
|
||||
|
||||
SAMPileupRecord r = parser.next();
|
||||
|
||||
// if ( (++line)%10000 == 0 ) System.out.printf("%s: record is %d (%s)%n", rodName, line,r.getLocation().toString());
|
||||
|
||||
if ( r.isPointGenotype() ) result.pointGenotype = r;
|
||||
else result.indelGenotype = r;
|
||||
|
||||
if ( parser.hasNext() ) {
|
||||
|
||||
SAMPileupRecord r2 = parser.next();
|
||||
int cmp = r.getLocation().compareTo(r2.getLocation());
|
||||
switch ( cmp ) {
|
||||
case -1 : // next record is at greater position
|
||||
parser.pushback(r2); // return next record back to the stream
|
||||
break;
|
||||
case 0: // next record is at the same position; in this case r MUST be point and r2 MUST be indel
|
||||
if ( result.indelGenotype != null ) { // oops, we've seen indel already
|
||||
if ( r2.isPointGenotype() ) throw new StingException("SAM pileup file might be corrupted: point genotype follows indel genotype line at "+r.getLocation() );
|
||||
else throw new StingException("SAM pileup file might be corrupted: two indel genotype lines found at same position "+r.getLocation() );
|
||||
} else {
|
||||
// ok, the first genotype we've seen was a point one
|
||||
if ( r2.isPointGenotype() ) throw new StingException("SAM pileup file might be corrupted: two point genotype lines found at same position "+r.getLocation() );
|
||||
// and the second is an indel, wheww...
|
||||
result.indelGenotype = r2;
|
||||
}
|
||||
|
||||
// the following is solely for the purposes of strict validation of pileup file: since we've already read two lines at the
|
||||
// the same genomic location (point and indel variants), there can not be any more:
|
||||
if ( parser.hasNext() ) {
|
||||
r2 = parser.next();
|
||||
cmp = r.getLocation().compareTo(r2.getLocation() ) ;
|
||||
if ( cmp == 0 ) throw new StingException("SAM pileup file is corrupted: more than two lines found at position "+r.getLocation());
|
||||
if ( cmp > 0 ) throw new StingException("SAM pileup file or reference dictionary is corrupted: lesser position "+r2.getLocation()+" is encountered right after position "
|
||||
+ r.getLocation() );
|
||||
parser.pushback(r2);
|
||||
}
|
||||
|
||||
break;
|
||||
case 1: // next record is at lower position?? come'on!
|
||||
throw new StingException("SAM pileup file or reference dictionary is corrupted: lesser position "+r2.getLocation()+" is encountered right after position " + r.getLocation() );
|
||||
default:
|
||||
throw new StingException("INTERNAL ERROR: this point should never be reached");
|
||||
}
|
||||
}
|
||||
return result ;
|
||||
}
|
||||
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static Iterator<rodSAMPileup> createIterator(String name, java.io.File file) {
|
||||
return new rodSAMPileup.rodSAMPileupIterator(name,file);
|
||||
}
|
||||
|
||||
public static void main(String argv[]) {
|
||||
String testFile = "/humgen/gsa-scr1/asivache/TCGA/Ovarian/C2K/0805/normal.pileup";
|
||||
// String testFile = "/humgen/gsa-scr1/asivache/trios/CEU/NA12891.12892.12878/mother.chr1.pileup.indel";
|
||||
|
||||
Iterator<rodSAMPileup> it = createIterator("test-normal", new java.io.File(testFile));
|
||||
|
||||
ReferenceSequenceFileWalker reference = new ReferenceSequenceFileWalker(
|
||||
new java.io.File( "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")
|
||||
// new java.io.File( "/humgen/gsa-scr1/asivache/trios/CEU/NA12891.12892.12878/human_b36_both.fasta")
|
||||
);
|
||||
|
||||
if ( reference.getSequenceDictionary() == null ) {
|
||||
System.out.println("No reference sequence dictionary found. Abort.");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||
|
||||
int counter = 0;
|
||||
|
||||
while ( it.hasNext() && counter < 430 ) {
|
||||
rodSAMPileup p = it.next();
|
||||
System.out.println(p.toString());
|
||||
/*
|
||||
System.out.print(p.getLocation().toString());
|
||||
System.out.print('\t');
|
||||
|
||||
if ( p.isIndel() && p.isSNP() ) { System.out.print("Indel+SNP"); }
|
||||
else {
|
||||
if ( p.isSNP() ) { System.out.print("SNP"); }
|
||||
else {
|
||||
if ( p.isIndel() ) { System.out.print("Indel"); }
|
||||
else { System.out.print("REF"); }
|
||||
}
|
||||
}
|
||||
|
||||
System.out.print('\t');
|
||||
System.out.print(p.getFWDAlleles().get(0)+"/"+p.getFWDAlleles().get(1));
|
||||
System.out.print('\t');
|
||||
System.out.println(p.getConsensusConfidence()+"\t"+p.getVariantConfidence());
|
||||
*/
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -54,7 +54,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
|
|||
|
||||
static {
|
||||
// All known ROD types
|
||||
Types.put("SAMPileup", rodSAMPileup.class);
|
||||
Types.put("GELI", rodGELI.class);
|
||||
Types.put("RefSeq", rodRefSeq.class);
|
||||
Types.put("Table", TabularROD.class);
|
||||
|
|
|
|||
|
|
@ -28,11 +28,12 @@ package org.broadinstitute.sting.gatk.walkers.qc;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.SAMPileupRecord;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodSAMPileup;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
@ -43,14 +44,14 @@ import java.util.Arrays;
|
|||
* each overlapping read, and quality score) to the reference pileup data generated by samtools. Samtools' pileup data
|
||||
* should be specified using the command-line argument '-B pileup,SAMPileup,<your sam pileup file>'.
|
||||
*/
|
||||
@Requires(value={DataSource.READS,DataSource.REFERENCE},referenceMetaData=@RMD(name="pileup",type=rodSAMPileup.class))
|
||||
@Requires(value={DataSource.READS,DataSource.REFERENCE},referenceMetaData=@RMD(name="pileup",type=SAMPileupFeature.class))
|
||||
public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats> implements TreeReducible<ValidationStats> {
|
||||
@Argument(fullName="continue_after_error",doc="Continue after an error",required=false)
|
||||
public boolean CONTINUE_AFTER_AN_ERROR = false;
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
SAMPileupRecord truePileup = getTruePileup( tracker );
|
||||
SAMPileupFeature truePileup = getTruePileup( tracker );
|
||||
|
||||
if ( truePileup == null ) {
|
||||
out.printf("No truth pileup data available at %s%n", pileup.getPileupString(ref.getBase()));
|
||||
|
|
@ -82,11 +83,12 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
return x;
|
||||
}
|
||||
|
||||
public static String pileupDiff(final ReadBackedPileup a, final SAMPileupRecord b, boolean orderDependent)
|
||||
public static String pileupDiff(final ReadBackedPileup a, final SAMPileupFeature b, boolean orderDependent)
|
||||
{
|
||||
if ( a.size() != b.size() )
|
||||
return "Sizes not equal";
|
||||
if ( a.getLocation().compareTo(b.getLocation()) != 0 )
|
||||
GenomeLoc featureLocation = GenomeLocParser.createGenomeLoc(b.getChr(),b.getStart(),b.getEnd());
|
||||
if ( a.getLocation().compareTo(featureLocation) != 0 )
|
||||
return "Locations not equal";
|
||||
|
||||
String aBases = maybeSorted(new String(a.getBases()), ! orderDependent );
|
||||
|
|
@ -123,8 +125,8 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
* @param tracker ROD tracker from which to extract pileup data.
|
||||
* @return True pileup data.
|
||||
*/
|
||||
private SAMPileupRecord getTruePileup( RefMetaDataTracker tracker ) {
|
||||
rodSAMPileup pileup = tracker.lookup("pileup",rodSAMPileup.class);
|
||||
private SAMPileupFeature getTruePileup( RefMetaDataTracker tracker ) {
|
||||
SAMPileupFeature pileup = tracker.lookup("pileup",SAMPileupFeature.class);
|
||||
|
||||
if( pileup == null)
|
||||
return null;
|
||||
|
|
|
|||
Loading…
Reference in New Issue