From 266936ed73dd2ec42c9ac8116eea5a5791265b9b Mon Sep 17 00:00:00 2001 From: zzh Date: Wed, 7 Jan 2026 18:21:50 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0ProtectedCLGATK=E7=B1=BB?= =?UTF-8?q?=EF=BC=8C=E6=96=B9=E4=BE=BF=E8=B0=83=E8=AF=95protect=E6=A8=A1?= =?UTF-8?q?=E5=9D=97=E4=B8=AD=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + .vscode/launch.json | 75 ++++ .vscode/settings.json | 6 + java.sh | 2 + pom.xml | 4 + .../gatk/engine/ProtectedCLGATK.java | 355 ++++++++++++++++++ .../gatk/utils/classloader/PluginManager.java | 16 +- 7 files changed, 458 insertions(+), 1 deletion(-) create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100755 java.sh create mode 100644 protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/ProtectedCLGATK.java diff --git a/.gitignore b/.gitignore index 7953ef28a..f55c6e5fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .idea/ .queue/ **/target/ +output/ /*.bam /*.bai /*.bed diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..1ce4de374 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,75 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "java", + "name": "BaseRecalibrator", + "request": "launch", + // "mainClass": "org.broadinstitute.gatk.engine.CommandLineGATK", + "mainClass": "org.broadinstitute.gatk.engine.ProtectedCLGATK", + "projectName": "gatk-tools-protected", + //"classPaths": [ + // "${workspaceFolder}/target/excutable/GenomeAnalysisTK.jar", + // "${workspaceFolder}/target/excutable/lib/*" + //], + //"classPaths": [ + // "${workspaceFolder}/protected/gatk-tools-protected/target/classes/", + // "${workspaceFolder}/public/gatk-engine/target/classes/", + //], + //"vmArgs": "-verbose:class", + + "args": [ + "-T", "BaseRecalibrator", + "-R", + "~/reference/bwa/human_g1k_v37_decoy.fasta", + "-knownSites", + "~/data/reference/hg19/vcfs/dbsnp_138.b37.vcf", + "-knownSites", + "~/data/reference/hg19/vcfs/1000G_phase1.indels.b37.vcf", + "-I", + //"~/data/fastdup-mini-md.bam", + //"~/data/f1kw.sam", + //"~/data/ft1kw.sam", + //"~/data/difft1kw.sam", + //"~/data/diff.sam", + //"~/data/diff2kw.sam", + //"~/data1/x.bam", + //"~/data1/y.bam", + "~/data/z.bam", + //"~/data/z-3.sam", + //"~/data/z-1.sam", + "-o", + // "./output/out-bqsr.table", + //"./output/x.table", + //"./output/y.table", + //"./output/z.table", + //"./output/z-3.table", + "./output/f-1.table", + // "--enable-baq", + ] + }, + { + "type": "java", + "name": "ApplyBQSR", + "request": "launch", + "mainClass": "org.broadinstitute.gatk.engine.CommandLineGATK", + "projectName": "gatk", + "args": [ + "ApplyBQSR", + "-R", + "~/reference/bwa/human_g1k_v37_decoy.fasta", + "-I", + "~/data1/x.bam", + //"~/data/z.bam", + //"~/data/ft1kw.sam", + "-bqsr", + "./output/f-1.table", + "-O", + "./output/f-1.sam", + ] + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..f5c07ed3d --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "java.compile.nullAnalysis.mode": "automatic", + "java.configuration.updateBuildConfiguration": "automatic", + "java.debug.settings.onBuildFailureProceed": true, + "java.home" : "/usr/lib/jvm/java-8-openjdk-amd64" +} \ No newline at end of file diff --git a/java.sh b/java.sh new file mode 100755 index 000000000..d97997226 --- /dev/null +++ b/java.sh @@ -0,0 +1,2 @@ +export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 +export PATH=$JAVA_HOME/bin:$PATH \ No newline at end of file diff --git a/pom.xml b/pom.xml index 36162ef04..ee7117d93 100644 --- a/pom.xml +++ b/pom.xml @@ -86,6 +86,10 @@ com.sun tools + 1.8 + + system + /usr/lib/jvm/java-8-openjdk-amd64/lib/tools.jar diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/ProtectedCLGATK.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/ProtectedCLGATK.java new file mode 100644 index 000000000..2b8e749d3 --- /dev/null +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/ProtectedCLGATK.java @@ -0,0 +1,355 @@ +package org.broadinstitute.gatk.engine; + +import htsjdk.samtools.SAMException; +import htsjdk.samtools.util.zip.DeflaterFactory; +import htsjdk.tribble.TribbleException; +import org.broadinstitute.gatk.utils.commandline.Argument; +import org.broadinstitute.gatk.utils.commandline.ArgumentCollection; +import org.broadinstitute.gatk.utils.commandline.CommandLineProgram; +import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection; +import org.broadinstitute.gatk.utils.refdata.tracks.FeatureManager; +import org.broadinstitute.gatk.engine.walkers.Attribution; +import org.broadinstitute.gatk.engine.walkers.Walker; +import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.broadinstitute.gatk.utils.help.*; +import org.broadinstitute.gatk.utils.text.TextFormattingUtils; + +import java.net.InetAddress; +import java.util.*; + +// 添加这个类就是为了能调试gatk-tools-protected里的代码 +public class ProtectedCLGATK extends CommandLineExecutable { + /** + * A complete list of tools (sometimes also called walkers because they "walk" + * through the data to perform analyses) + * is available in the online documentation. + */ + @Argument(fullName = "analysis_type", shortName = "T", doc = "Name of the tool to run") + private String analysisName = null; + + // our argument collection, the collection of command line args we accept + @ArgumentCollection + private GATKArgumentCollection argCollection = new GATKArgumentCollection(); + + /** + * Get pleasing info about the GATK. + * + * @return A list of Strings that contain pleasant info about the GATK. + */ + @Override + protected ApplicationDetails getApplicationDetails() { + return new ApplicationDetails(createApplicationHeader(), + getAttribution(), + ApplicationDetails.createDefaultRunningInstructions(getClass()), + getAdditionalHelp()); + } + + @Override + public String getAnalysisName() { + return analysisName; + } + + @Override + protected GATKArgumentCollection getArgumentCollection() { + return argCollection; + } + + /** + * Required main method implementation. + */ + public static void main(String[] argv) { + try { + ProtectedCLGATK instance = new ProtectedCLGATK(); + start(instance, argv); + System.exit(CommandLineProgram.result); // todo -- this is a painful hack + } catch (UserException e) { + exitSystemWithUserError(e); + } catch (TribbleException e) { + // We can generate Tribble Exceptions in weird places when e.g. VCF genotype + // fields are + // lazy loaded, so they aren't caught elsewhere and made into User Exceptions + exitSystemWithUserError(e); + } catch (SAMException e) { + checkForMaskedUserErrors(e); + exitSystemWithSamError(e); + } catch (OutOfMemoryError e) { + exitSystemWithUserError(new UserException.NotEnoughMemory()); + } catch (Throwable t) { + checkForMaskedUserErrors(t); + exitSystemWithError(t); + } + } + + public static final String PICARD_TEXT_SAM_FILE_ERROR_1 = "Cannot use index file with textual SAM file"; + public static final String PICARD_TEXT_SAM_FILE_ERROR_2 = "Cannot retrieve file pointers within SAM text files"; + public static final String NO_SPACE_LEFT_ON_DEVICE_ERROR = "No space left on device"; + public static final String DISK_QUOTA_EXCEEDED_ERROR = "Disk quota exceeded"; + + private static void checkForMaskedUserErrors(final Throwable t) { + // masked out of memory error + if (t instanceof OutOfMemoryError) + exitSystemWithUserError(new UserException.NotEnoughMemory()); + // masked user error + if (t instanceof UserException || t instanceof TribbleException) + exitSystemWithUserError(new UserException(t.getMessage())); + + // no message means no masked error + final String message = t.getMessage(); + if (message == null) + return; + + // too many open files error + if (message.contains("Too many open files")) + exitSystemWithUserError(new UserException.TooManyOpenFiles()); + + // malformed BAM looks like a SAM file + if (message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || message.contains(PICARD_TEXT_SAM_FILE_ERROR_2)) + exitSystemWithSamError(t); + + // can't close tribble index when writing + if (message.contains("Unable to close index for")) + exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage())); + + // disk is full + if (message.contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || message.contains(DISK_QUOTA_EXCEEDED_ERROR)) + exitSystemWithUserError(new UserException.NoSpaceOnDevice()); + + // masked error wrapped in another one + if (t.getCause() != null) + checkForMaskedUserErrors(t.getCause()); + } + + /** + * Creates the a short blurb about the GATK, copyright info, and where to get + * documentation. + * + * @return The application header. + */ + public static List createApplicationHeader() { + List header = new ArrayList(); + header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s", getVersionNumber(), + getBuildTime())); + header.add("Copyright (c) 2010-2016 The Broad Institute"); + header.add("For support and documentation go to " + HelpConstants.BASE_GATK_URL); + try { + header.add("[" + new Date() + "] Executing on " + + System.getProperty("os.name") + " " + System.getProperty("os.version") + + " " + System.getProperty("os.arch")); + header.add(System.getProperty("java.vm.name") + + " " + System.getProperty("java.runtime.version")); + } catch (Exception e) { + /* Unpossible! */ } + + return header; + } + + /** + * If the user supplied any additional attribution, return it here. + * + * @return Additional attribution if supplied by the user. Empty (non-null) list + * otherwise. + */ + private List getAttribution() { + List attributionLines = new ArrayList(); + + // If no analysis name is present, fill in extra help on the walkers. + WalkerManager walkerManager = engine.getWalkerManager(); + String analysisName = getAnalysisName(); + if (analysisName != null && walkerManager.exists(analysisName)) { + Class walkerType = walkerManager.getWalkerClassByName(analysisName); + if (walkerType.isAnnotationPresent(Attribution.class)) + attributionLines.addAll(Arrays.asList(walkerType.getAnnotation(Attribution.class).value())); + } + return attributionLines; + } + + /** + * Retrieves additional information about GATK walkers. + * the code in HelpFormatter and supply it as a helper to this method. + * + * @return A string summarizing the walkers available in this distribution. + */ + private String getAdditionalHelp() { + String additionalHelp; + + // If no analysis name is present, fill in extra help on the walkers. + WalkerManager walkerManager = engine.getWalkerManager(); + if (analysisName != null && walkerManager.exists(analysisName)) + additionalHelp = getWalkerHelp(walkerManager.getWalkerClassByName(analysisName)); + else + additionalHelp = getAllWalkerHelp(); + + return additionalHelp; + } + + private static final int PACKAGE_INDENT = 1; + private static final int WALKER_INDENT = 3; + private static final String FIELD_SEPARATOR = " "; + + private String getWalkerHelp(Class walkerType) { + // Construct a help string to output details on this walker. + StringBuilder additionalHelp = new StringBuilder(); + Formatter formatter = new Formatter(additionalHelp); + + formatter.format("Available Reference Ordered Data types:%n"); + formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures()); + formatter.format("%n"); + + formatter.format("For a full description of this walker, see its GATKdocs at:%n"); + formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType)); + + return additionalHelp.toString(); + } + + /** + * Load in additional help information about all available walkers. + * + * @return A string representation of the additional help. + */ + private String getAllWalkerHelp() { + // Construct a help string to output available walkers. + StringBuilder additionalHelp = new StringBuilder(); + Formatter formatter = new Formatter(additionalHelp); + + // Get the list of walker names from the walker manager. + WalkerManager walkerManager = engine.getWalkerManager(); + + // Build a list sorted by walker display name. As this information is collected, + // keep track of the longest + // package / walker name for later formatting. + SortedSet helpText = new TreeSet(new HelpEntryComparator()); + + int longestPackageName = 0; + int longestWalkerName = 0; + for (Map.Entry>> walkersByPackage : walkerManager + .getWalkerNamesByPackage(true).entrySet()) { + // Get the display name. + String packageName = walkersByPackage.getKey(); + String packageDisplayName = walkerManager.getPackageDisplayName(walkersByPackage.getKey()); + String packageHelpText = walkerManager.getPackageSummaryText(packageName); + + // Compute statistics about which names is longest. + longestPackageName = Math.max(longestPackageName, packageDisplayName.length()); + + SortedSet walkersInPackage = new TreeSet(new HelpEntryComparator()); + for (Class walkerType : walkersByPackage.getValue()) { + String walkerName = walkerType.getName(); + String walkerDisplayName = walkerManager.getName(walkerType); + String walkerHelpText = walkerManager.getWalkerSummaryText(walkerType); + + longestWalkerName = Math.max(longestWalkerName, walkerManager.getName(walkerType).length()); + + walkersInPackage.add(new HelpEntry(walkerName, walkerDisplayName, walkerHelpText)); + } + + // Dump the walkers into the sorted set. + helpText.add(new HelpEntry(packageName, packageDisplayName, packageHelpText, + Collections.unmodifiableSortedSet(walkersInPackage))); + } + + final int headerWidth = Math.max(longestPackageName + PACKAGE_INDENT, longestWalkerName + WALKER_INDENT); + + for (HelpEntry packageHelp : helpText) { + printDescriptorLine(formatter, PACKAGE_INDENT, packageHelp.displayName, headerWidth, FIELD_SEPARATOR, + packageHelp.summary, TextFormattingUtils.DEFAULT_LINE_WIDTH); + + for (HelpEntry walkerHelp : packageHelp.children) + printDescriptorLine(formatter, WALKER_INDENT, walkerHelp.displayName, headerWidth, FIELD_SEPARATOR, + walkerHelp.summary, TextFormattingUtils.DEFAULT_LINE_WIDTH); + + // Print a blank line between sets of walkers. + printDescriptorLine(formatter, 0, "", headerWidth, FIELD_SEPARATOR, "", + TextFormattingUtils.DEFAULT_LINE_WIDTH); + } + + return additionalHelp.toString(); + } + + private void printDescriptorLine(Formatter formatter, + int headerIndentWidth, + String header, + int headerWidth, + String fieldSeparator, + String description, + int lineWidth) { + final int headerPaddingWidth = headerWidth - header.length() - headerIndentWidth; + final int descriptionWidth = lineWidth - fieldSeparator.length() - headerWidth; + List wordWrappedText = TextFormattingUtils.wordWrap(description, descriptionWidth); + + String headerIndentFormatString = headerIndentWidth > 0 ? "%" + headerIndentWidth + "s" : "%s"; + String headerPaddingFormatString = headerPaddingWidth > 0 ? "%" + headerPaddingWidth + "s" : "%s"; + String headerWidthFormatString = headerWidth > 0 ? "%" + headerWidth + "s" : "%s"; + + // Output description line. + formatter.format(headerIndentFormatString + "%s" + headerPaddingFormatString + "%s%s%n", + "", header, "", fieldSeparator, wordWrappedText.size() > 0 ? wordWrappedText.get(0) : ""); + for (int i = 1; i < wordWrappedText.size(); i++) + formatter.format(headerWidthFormatString + "%s%s%n", "", fieldSeparator, wordWrappedText.get(i)); + } + +} + +/** + * Represents a given help entry; contains a display name, a summary and + * optionally some children. + */ +class HelpEntry { + public final String uid; + public final String displayName; + public final String summary; + public final SortedSet children; + + /** + * Create a new help entry with the given display name, summary and children. + * + * @param uid a unique identifier. Usually, the java package. + * @param displayName display name for this help entry. + * @param summary summary for this help entry. + * @param children children for this help entry. + */ + public HelpEntry(String uid, String displayName, String summary, SortedSet children) { + this.uid = uid; + this.displayName = displayName; + this.summary = summary; + this.children = children; + } + + /** + * Create a new help entry with the given display name, summary and children. + * + * @param uid a unique identifier. Usually, the java package. + * @param displayName display name for this help entry. + * @param summary summary for this help entry. + */ + public HelpEntry(String uid, String displayName, String summary) { + this(uid, displayName, summary, null); + } + +} + +/** + * Compare two help entries by display name. + */ +class HelpEntryComparator implements Comparator { + private static TextFormattingUtils.CaseInsensitiveComparator textComparator = new TextFormattingUtils.CaseInsensitiveComparator(); + + /** + * Compares the order of lhs to rhs, not taking case into account. + * + * @param lhs First object to compare. + * @param rhs Second object to compare. + * @return 0 if objects are identical; -1 if lhs is before rhs, 1 if rhs is + * before lhs. Nulls are treated as after everything else. + */ + public int compare(HelpEntry lhs, HelpEntry rhs) { + if (lhs == null && rhs == null) + return 0; + if (lhs == null || lhs.displayName.equals("")) + return 1; + if (rhs == null || rhs.displayName.equals("")) + return -1; + return lhs.displayName.equals(rhs.displayName) ? textComparator.compare(lhs.uid, rhs.uid) + : textComparator.compare(lhs.displayName, rhs.displayName); + } + +} diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/classloader/PluginManager.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/classloader/PluginManager.java index b2b6c731b..b18f8d5dc 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/classloader/PluginManager.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/classloader/PluginManager.java @@ -74,6 +74,12 @@ public class PluginManager { if (!url.equals(cwd)) classPathUrls.add(url); + //try { + // // classPathUrls.add(new URL("file:/home/zzh/work/ngs/gatk3.8/gatk-protected/protected/gatk-tools-protected/target/classes/")); + // classPathUrls.add(new URL("file:/home/zzh/work/ngs/gatk3.8/gatk-protected/protected/gatk-tools-protected/target/gatk-tools-protected-3.8-1.jar")); + //} catch (Exception e) { + //} + defaultReflections = new Reflections( new ConfigurationBuilder() .setUrls(classPathUrls) .setScanners(new SubTypesScanner())); @@ -148,10 +154,18 @@ public class PluginManager { .setScanners(new SubTypesScanner())); } + // if (classpath != null) + // for (URL cp : classpath) + // System.out.println("zzh: " + cp.toString()); + // else + // System.out.println("zzh: classpath is null"); + // Load all classes types filtering them by concrete. @SuppressWarnings("unchecked") Set> allTypes = reflections.getSubTypesOf(pluginType); - for( Class type: allTypes ) { + + for (Class type : allTypes) { + // System.out.println("zzh: " + type.getName()); // The plugin manager does not support anonymous classes; to be a plugin, a class must have a name. if(JVMUtils.isAnonymous(type)) continue;