AnalyzeAnnotations now breaks out its TiTv plots into novel SNPs, dbSNP sites, and combined.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2659 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e00cb688ac
commit
a11503819a
|
|
@ -15,7 +15,13 @@ c <- read.table(input, header=T)
|
||||||
# Plot TiTv ratio as a function of the annotation
|
# Plot TiTv ratio as a function of the annotation
|
||||||
#
|
#
|
||||||
|
|
||||||
gt = c[c$numVariants>minBinCutoff,]
|
gt = c[c$numVariants>minBinCutoff & c$category==0,]
|
||||||
|
novel = c[c$numVariants>minBinCutoff & c$category==1,]
|
||||||
|
dbsnp = c[c$numVariants>minBinCutoff & c$category==2,]
|
||||||
|
|
||||||
|
d = c[c$numVariants>minBinCutoff,]
|
||||||
|
cmin = min(d$titv)
|
||||||
|
cmax = max(d$titv)
|
||||||
|
|
||||||
outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="")
|
outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="")
|
||||||
pdf(outfile, height=7, width=7)
|
pdf(outfile, height=7, width=7)
|
||||||
|
|
@ -31,6 +37,24 @@ abline(v=m75,lty=2)
|
||||||
abline(v=m25,lty=2)
|
abline(v=m25,lty=2)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
|
outfile = paste(outputDir, "binnedTiTv_novel.", annotationName, ".pdf", sep="")
|
||||||
|
pdf(outfile, height=7, width=7)
|
||||||
|
par(cex=1.1)
|
||||||
|
plot(gt$value,gt$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,yim=c(cmin,cmax));
|
||||||
|
m = weighted.mean(gt$value,gt$numVariants/sum(gt$numVariants))
|
||||||
|
ma = gt[gt$value > m,]
|
||||||
|
mb = gt[gt$value < m,]
|
||||||
|
m75 = weighted.mean(ma$value,ma$numVariants/sum(ma$numVariants))
|
||||||
|
m25 = weighted.mean(mb$value,mb$numVariants/sum(mb$numVariants))
|
||||||
|
abline(v=m,lty=2)
|
||||||
|
abline(v=m75,lty=2)
|
||||||
|
abline(v=m25,lty=2)
|
||||||
|
points(novel$value,novel$titv,col="green",pch=20)
|
||||||
|
points(dbsnp$value,dbsnp$titv,col="blue",pch=20)
|
||||||
|
legend("topleft", c("overall","novel","dbsnp"),col=c("black","green","blue"),pch=c(20,20,20))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
outfile = paste(outputDir, "binnedTiTv_quartiles.", annotationName, ".pdf", sep="")
|
outfile = paste(outputDir, "binnedTiTv_quartiles.", annotationName, ".pdf", sep="")
|
||||||
pdf(outfile, height=7, width=7)
|
pdf(outfile, height=7, width=7)
|
||||||
par(cex=1.1)
|
par(cex=1.1)
|
||||||
|
|
|
||||||
|
|
@ -41,10 +41,16 @@ import java.io.FileNotFoundException;
|
||||||
|
|
||||||
public class AnnotationDataManager {
|
public class AnnotationDataManager {
|
||||||
|
|
||||||
public final HashMap<String, TreeSet<AnnotationDatum>> data;
|
public final HashMap<String, TreeSet<AnnotationDatum>> dataFull;
|
||||||
|
public final HashMap<String, TreeSet<AnnotationDatum>> dataNovel;
|
||||||
|
public final HashMap<String, TreeSet<AnnotationDatum>> dataDBsnp;
|
||||||
|
public final HashMap<String, TreeSet<AnnotationDatum>> dataTruthSet;
|
||||||
|
|
||||||
public AnnotationDataManager() {
|
public AnnotationDataManager() {
|
||||||
data = new HashMap<String, TreeSet<AnnotationDatum>>();
|
dataFull = new HashMap<String, TreeSet<AnnotationDatum>>();
|
||||||
|
dataNovel = new HashMap<String, TreeSet<AnnotationDatum>>();
|
||||||
|
dataDBsnp = new HashMap<String, TreeSet<AnnotationDatum>>();
|
||||||
|
dataTruthSet = new HashMap<String, TreeSet<AnnotationDatum>>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addAnnotations( RodVCF variant ) {
|
public void addAnnotations( RodVCF variant ) {
|
||||||
|
|
@ -57,10 +63,51 @@ public class AnnotationDataManager {
|
||||||
}
|
}
|
||||||
final float value = Float.parseFloat( infoField.get( annotationKey ) );
|
final float value = Float.parseFloat( infoField.get( annotationKey ) );
|
||||||
|
|
||||||
TreeSet<AnnotationDatum> treeSet = data.get( annotationKey );
|
if( variant.getID().equals(".") ) { // This is a novel variant
|
||||||
|
TreeSet<AnnotationDatum> treeSet = dataNovel.get( annotationKey );
|
||||||
if( treeSet == null ) { // This annotation hasn't been seen before
|
if( treeSet == null ) { // This annotation hasn't been seen before
|
||||||
treeSet = new TreeSet<AnnotationDatum>( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation
|
treeSet = new TreeSet<AnnotationDatum>( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation
|
||||||
data.put( annotationKey, treeSet );
|
dataNovel.put( annotationKey, treeSet );
|
||||||
|
}
|
||||||
|
AnnotationDatum datum = new AnnotationDatum( value );
|
||||||
|
if( treeSet.contains(datum) ) {
|
||||||
|
datum = treeSet.tailSet(datum).first();
|
||||||
|
} else {
|
||||||
|
treeSet.add(datum);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide if it was a Ti or a Tv
|
||||||
|
if( BaseUtils.isTransition(variant.getReferenceForSNP(), variant.getAlternativeBaseForSNP()) ) {
|
||||||
|
datum.incrementTi();
|
||||||
|
} else {
|
||||||
|
datum.incrementTv();
|
||||||
|
}
|
||||||
|
} else { // This is a DBsnp variant
|
||||||
|
TreeSet<AnnotationDatum> treeSet = dataDBsnp.get( annotationKey );
|
||||||
|
if( treeSet == null ) { // This annotation hasn't been seen before
|
||||||
|
treeSet = new TreeSet<AnnotationDatum>( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation
|
||||||
|
dataDBsnp.put( annotationKey, treeSet );
|
||||||
|
}
|
||||||
|
AnnotationDatum datum = new AnnotationDatum( value );
|
||||||
|
if( treeSet.contains(datum) ) {
|
||||||
|
datum = treeSet.tailSet(datum).first();
|
||||||
|
} else {
|
||||||
|
treeSet.add(datum);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide if it was a Ti or a Tv
|
||||||
|
if( BaseUtils.isTransition(variant.getReferenceForSNP(), variant.getAlternativeBaseForSNP()) ) {
|
||||||
|
datum.incrementTi();
|
||||||
|
} else {
|
||||||
|
datum.incrementTv();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// the overall set, containing both novel and DBsnp variants
|
||||||
|
TreeSet<AnnotationDatum> treeSet = dataFull.get( annotationKey );
|
||||||
|
if( treeSet == null ) { // This annotation hasn't been seen before
|
||||||
|
treeSet = new TreeSet<AnnotationDatum>( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation
|
||||||
|
dataFull.put( annotationKey, treeSet );
|
||||||
}
|
}
|
||||||
AnnotationDatum datum = new AnnotationDatum( value );
|
AnnotationDatum datum = new AnnotationDatum( value );
|
||||||
if( treeSet.contains(datum) ) {
|
if( treeSet.contains(datum) ) {
|
||||||
|
|
@ -81,7 +128,7 @@ public class AnnotationDataManager {
|
||||||
public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_DIR,
|
public void plotCumulativeTables( final String PATH_TO_RSCRIPT, final String PATH_TO_RESOURCES, final String OUTPUT_DIR,
|
||||||
final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) {
|
final int MIN_VARIANTS_PER_BIN, final int MAX_VARIANTS_PER_BIN ) {
|
||||||
|
|
||||||
for( String annotationKey: data.keySet() ) {
|
for( String annotationKey: dataFull.keySet() ) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
PrintStream output;
|
PrintStream output;
|
||||||
|
|
@ -130,13 +177,13 @@ public class AnnotationDataManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output a header line
|
// Output a header line
|
||||||
output.println("value\ttitv\tnumVariants\t");
|
output.println("value\ttitv\tnumVariants\tcategory");
|
||||||
|
|
||||||
// Bin SNPs and calculate truth metrics for each bin, right now this is just TiTv
|
// Bin SNPs and calculate truth metrics for each bin, right now this is just TiTv
|
||||||
int numTi = 0;
|
int numTi = 0;
|
||||||
int numTv = 0;
|
int numTv = 0;
|
||||||
AnnotationDatum lastDatum = null;
|
AnnotationDatum lastDatum = null;
|
||||||
for( AnnotationDatum datum : data.get( annotationKey ) ) {
|
for( AnnotationDatum datum : dataFull.get( annotationKey ) ) {
|
||||||
numTi += datum.numTransitions;
|
numTi += datum.numTransitions;
|
||||||
numTv += datum.numTransversions;
|
numTv += datum.numTransversions;
|
||||||
lastDatum = datum;
|
lastDatum = datum;
|
||||||
|
|
@ -144,7 +191,7 @@ public class AnnotationDataManager {
|
||||||
float titv;
|
float titv;
|
||||||
if( numTv == 0) { titv = 0.0f; }
|
if( numTv == 0) { titv = 0.0f; }
|
||||||
else { titv = ((float) numTi) / ((float) numTv); }
|
else { titv = ((float) numTi) / ((float) numTv); }
|
||||||
output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv));
|
output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) + "\t0");
|
||||||
numTi = 0;
|
numTi = 0;
|
||||||
numTv = 0;
|
numTv = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -154,9 +201,59 @@ public class AnnotationDataManager {
|
||||||
float titv;
|
float titv;
|
||||||
if( numTv == 0) { titv = 0.0f; }
|
if( numTv == 0) { titv = 0.0f; }
|
||||||
else { titv = ((float) numTi) / ((float) numTv); }
|
else { titv = ((float) numTi) / ((float) numTv); }
|
||||||
output.println(lastDatum.value + "\t" + titv + "\t" + (numTi+numTv));
|
output.println(lastDatum.value + "\t" + titv + "\t" + (numTi+numTv)+ "\t0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
numTi = 0;
|
||||||
|
numTv = 0;
|
||||||
|
lastDatum = null;
|
||||||
|
for( AnnotationDatum datum : dataNovel.get( annotationKey ) ) {
|
||||||
|
numTi += datum.numTransitions;
|
||||||
|
numTv += datum.numTransversions;
|
||||||
|
lastDatum = datum;
|
||||||
|
if( numTi + numTv >= MAX_VARIANTS_PER_BIN/3 ) { // This annotation bin is full
|
||||||
|
float titv;
|
||||||
|
if( numTv == 0) { titv = 0.0f; }
|
||||||
|
else { titv = ((float) numTi) / ((float) numTv); }
|
||||||
|
output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) + "\t1");
|
||||||
|
numTi = 0;
|
||||||
|
numTv = 0;
|
||||||
|
}
|
||||||
|
// else, continue accumulating variants because this bin isn't full yet
|
||||||
|
}
|
||||||
|
if( numTi != 0 || numTv != 0 ) { // one final bin that may not have been dumped out
|
||||||
|
float titv;
|
||||||
|
if( numTv == 0) { titv = 0.0f; }
|
||||||
|
else { titv = ((float) numTi) / ((float) numTv); }
|
||||||
|
output.println(lastDatum.value + "\t" + titv + "\t" + (numTi+numTv)+ "\t1");
|
||||||
|
}
|
||||||
|
|
||||||
|
numTi = 0;
|
||||||
|
numTv = 0;
|
||||||
|
lastDatum = null;
|
||||||
|
for( AnnotationDatum datum : dataDBsnp.get( annotationKey ) ) {
|
||||||
|
numTi += datum.numTransitions;
|
||||||
|
numTv += datum.numTransversions;
|
||||||
|
lastDatum = datum;
|
||||||
|
if( numTi + numTv >= MAX_VARIANTS_PER_BIN ) { // This annotation bin is full
|
||||||
|
float titv;
|
||||||
|
if( numTv == 0) { titv = 0.0f; }
|
||||||
|
else { titv = ((float) numTi) / ((float) numTv); }
|
||||||
|
output.println(datum.value + "\t" + titv + "\t" + (numTi+numTv) + "\t2");
|
||||||
|
numTi = 0;
|
||||||
|
numTv = 0;
|
||||||
|
}
|
||||||
|
// else, continue accumulating variants because this bin isn't full yet
|
||||||
|
}
|
||||||
|
if( numTi != 0 || numTv != 0 ) { // one final bin that may not have been dumped out
|
||||||
|
float titv;
|
||||||
|
if( numTv == 0) { titv = 0.0f; }
|
||||||
|
else { titv = ((float) numTi) / ((float) numTv); }
|
||||||
|
output.println(lastDatum.value + "\t" + titv + "\t" + (numTi+numTv)+ "\t2");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " +
|
System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_CumulativeTiTv.R" + " " +
|
||||||
OUTPUT_DIR + annotationKey + ".cumulative.dat" + " " + OUTPUT_DIR + " " + annotationKey);
|
OUTPUT_DIR + annotationKey + ".cumulative.dat" + " " + OUTPUT_DIR + " " + annotationKey);
|
||||||
|
|
@ -169,9 +266,9 @@ public class AnnotationDataManager {
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " +
|
System.out.println(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Annotations_BinnedTiTv.R" + " " +
|
||||||
// OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey +
|
OUTPUT_DIR + annotationKey + ".binned.dat" + " " + OUTPUT_DIR + " " + annotationKey +
|
||||||
// " " + MIN_VARIANTS_PER_BIN);
|
" " + MIN_VARIANTS_PER_BIN);
|
||||||
|
|
||||||
// Execute the RScript command to plot the table of TiTv values
|
// Execute the RScript command to plot the table of TiTv values
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue