From 38d822fac7006f7d9c4a7f0d204c0b96017093a0 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Wed, 10 Sep 2025 22:10:10 -0400 Subject: [PATCH 01/27] - fix links to fingerprint package and update the docs --- rcdk/DESCRIPTION | 2 +- rcdk/R/fingerprint.R | 54 ++++++++++++++++++------------------- rcdk/man/get.fingerprint.Rd | 12 ++++----- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/rcdk/DESCRIPTION b/rcdk/DESCRIPTION index 85815618ea..5d986d7cb5 100755 --- a/rcdk/DESCRIPTION +++ b/rcdk/DESCRIPTION @@ -35,7 +35,7 @@ Description: Allows the user to access functionality in the 'CDK', a Java framework for chemoinformatics. This allows the user to load molecules, evaluate fingerprints, calculate molecular descriptors and so on. In addition, the 'CDK' API allows the user to view structures in 2D. -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.3 VignetteBuilder: knitr Encoding: UTF-8 Remotes: CDK-R/depict diff --git a/rcdk/R/fingerprint.R b/rcdk/R/fingerprint.R index a145987c25..bd4af48efd 100755 --- a/rcdk/R/fingerprint.R +++ b/rcdk/R/fingerprint.R @@ -1,8 +1,8 @@ #' Generate molecular fingerprints -#' +#' #' `get.fingerprint` returns a `fingerprint` object representing molecular fingerprint of #' the input molecule. -#' +#' #' @param molecule A \code{jobjRef} object to an \code{IAtomContaine} #' @param type The type of fingerprint. Possible values are: #' \itemize{ @@ -18,52 +18,52 @@ #' \item pubchem - 881 bit fingerprints defined by PubChem #' \item kr - 4860 bit fingerprint defined by Klekota and Roth #' \item shortestpath - A fingerprint based on the shortest paths between pairs of atoms and takes into account ring systems, charges etc. -#' \item signature - A feature,count type of fingerprint, similar in nature to circular fingerprints, but based on the signature +#' \item signature - A feature,count type of fingerprint, similar in nature to circular fingerprints, but based on the signature #' descriptor #' \item circular - An implementation of the ECFP6 (default) fingerprint. Other circular types can be chosen by modifying the \code{circular.type} parameter. #' \item substructure - Fingerprint based on list of SMARTS pattern. By default a set of functional groups is tested. #' } #' @param fp.mode The style of fingerprint. Specifying "`bit`" will return a binary fingerprint, -#' "`raw`" returns the the original representation (usually sequence of integers) and +#' "`raw`" returns the the original representation (usually sequence of integers) and #' "`count`" returns the fingerprint as a sequence of counts. #' @param depth The search depth. This argument is ignored for the #' `pubchem`, `maccs`, `kr` and `estate` fingerprints -#' @param size The final length of the fingerprint. -#' This argument is ignored for the `pubchem`, `maccs`, `kr`, `signature`, `circular` and +#' @param size The final length of the fingerprint. +#' This argument is ignored for the `pubchem`, `maccs`, `kr`, `signature`, `circular` and #' `estate` fingerprints #' @param substructure.pattern List of characters containing the SMARTS pattern to match. If the an empty list is provided (default) than the functional groups substructures (default in CDK) are used. #' @param circular.type Name of the circular fingerprint type that should be computed given as string. Possible values are: 'ECFP0', 'ECFP2', 'ECFP4', 'ECFP6' (default), 'FCFP0', 'FCFP2', 'FCFP4' and 'FCFP6'. #' @param verbose Verbose output if \code{TRUE} -#' @return an S4 object of class \code{\link{fingerprint-class}} or \code{\link{featvec-class}}, +#' @return an S4 object of class \code{\link[fingerprint]{fingerprint-class}} or \code{\link[fingerprint]{featvec-class}}, #' which can be manipulated with the fingerprint package. #' @export #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) -#' @examples +#' @examples #' ## get some molecules #' sp <- get.smiles.parser() #' smiles <- c('CCC', 'CCN', 'CCN(C)(C)', 'c1ccccc1Cc1ccccc1','C1CCC1CC(CN(C)(C))CC(=O)CC') #' mols <- parse.smiles(smiles) -#' +#' #' ## get a single fingerprint using the standard #' ## (hashed, path based) fingerprinter #' fp <- get.fingerprint(mols[[1]]) -#' +#' #' ## get MACCS keys for all the molecules #' fps <- lapply(mols, get.fingerprint, type='maccs') -#' +#' #' ## get Signature fingerprint #' ## feature, count fingerprinter #' fps <- lapply(mols, get.fingerprint, type='signature', fp.mode='raw') #' ## get Substructure fingerprint for functional group fragments #' fps <- lapply(mols, get.fingerprint, type='substructure') -#' +#' #' ## get Substructure count fingerprint for user defined fragments #' mol1 <- parse.smiles("c1ccccc1CCC")[[1]] #' smarts <- c("c1ccccc1", "[CX4H3][#6]", "[CX2]#[CX2]") #' fps <- get.fingerprint(mol1, type='substructure', fp.mode='count', #' substructure.pattern=smarts) -#' -#' ## get ECFP0 count fingerprints +#' +#' ## get ECFP0 count fingerprints #' mol2 <- parse.smiles("C1=CC=CC(=C1)CCCC2=CC=CC=C2")[[1]] #' fps <- get.fingerprint(mol2, type='circular', fp.mode='count', circular.type='ECFP0') get.fingerprint <- function(molecule, type = 'standard', fp.mode = 'bit', depth=6, size=1024, substructure.pattern=character(), circular.type = "ECFP6", verbose=FALSE) { @@ -75,17 +75,17 @@ get.fingerprint <- function(molecule, type = 'standard', fp.mode = 'bit', depth= mode(size) <- 'integer' mode(depth) <- 'integer' - + # Determine integer ID for the circular fingerprint given its desired type. - # This allows us to use also ECFP4, ... + # This allows us to use also ECFP4, ... if (type == 'circular') { - circular.type.id <- switch(circular.type, + circular.type.id <- switch(circular.type, ECFP0 = 1, ECFP2 = 2, ECFP4 = 3, ECFP6 = 4, FCFP0 = 5, FCFP2 = 6, FCFP4 = 7, FCFP6 = 8, NULL) - + if (is.null(circular.type.id)) stop(paste('Invalid circular fingerprint type: ', circular.type)) - + mode(circular.type.id) <- 'integer' } @@ -103,8 +103,8 @@ get.fingerprint <- function(molecule, type = 'standard', fp.mode = 'bit', depth= shortestpath = .jnew('org/openscience/cdk/fingerprint/ShortestPathFingerprinter', size), signature = .jnew('org/openscience/cdk/fingerprint/SignatureFingerprinter', depth), circular = .jnew('org/openscience/cdk/fingerprint/CircularFingerprinter', circular.type.id), - substructure = - if (length(substructure.pattern) == 0) + substructure = + if (length(substructure.pattern) == 0) # Loads the default group substructures { .jnew('org/openscience/cdk/fingerprint/SubstructureFingerprinter') } else @@ -125,11 +125,11 @@ get.fingerprint <- function(molecule, type = 'standard', fp.mode = 'bit', depth= } else if (fp.mode == 'count') { jfp <- .jcall(fingerprinter, "Lorg/openscience/cdk/fingerprint/ICountFingerprint;", - "getCountFingerprint", molecule, check=FALSE) + "getCountFingerprint", molecule, check=FALSE) } - + e <- .jgetEx() - if (.jcheck(silent=TRUE)) { + if (.jcheck(silent=TRUE)) { if (verbose) print(e) return(NULL) } @@ -139,7 +139,7 @@ get.fingerprint <- function(molecule, type = 'standard', fp.mode = 'bit', depth= if (fp.mode == 'bit') { bitset <- .jcall(jfp, "Ljava/util/BitSet;", "asBitSet") - + if (type == 'maccs') nbit <- 166 else if (type == 'estate') nbit <- 79 else if (type == 'pubchem') nbit <- 881 @@ -147,7 +147,7 @@ get.fingerprint <- function(molecule, type = 'standard', fp.mode = 'bit', depth= else if (type == 'substructure') nbit <- .jcall(fingerprinter, "I", "getSize") else if (type == 'circular') nbit <- .jcall(fingerprinter, "I", "getSize") else nbit <- size - + bitset <- .jcall(bitset, "S", "toString") s <- gsub('[{}]','', bitset) s <- strsplit(s, split=',')[[1]] @@ -166,7 +166,7 @@ get.fingerprint <- function(molecule, type = 'standard', fp.mode = 'bit', depth= tempkey <- .jsimplify(tempkey) } keys[[i]] <- tempkey - + } values <- list() diff --git a/rcdk/man/get.fingerprint.Rd b/rcdk/man/get.fingerprint.Rd index e057889b3e..605e0ac87f 100755 --- a/rcdk/man/get.fingerprint.Rd +++ b/rcdk/man/get.fingerprint.Rd @@ -32,21 +32,21 @@ atomic properties into account into account \item pubchem - 881 bit fingerprints defined by PubChem \item kr - 4860 bit fingerprint defined by Klekota and Roth \item shortestpath - A fingerprint based on the shortest paths between pairs of atoms and takes into account ring systems, charges etc. -\item signature - A feature,count type of fingerprint, similar in nature to circular fingerprints, but based on the signature +\item signature - A feature,count type of fingerprint, similar in nature to circular fingerprints, but based on the signature descriptor \item circular - An implementation of the ECFP6 (default) fingerprint. Other circular types can be chosen by modifying the \code{circular.type} parameter. \item substructure - Fingerprint based on list of SMARTS pattern. By default a set of functional groups is tested. }} \item{fp.mode}{The style of fingerprint. Specifying "`bit`" will return a binary fingerprint, -"`raw`" returns the the original representation (usually sequence of integers) and +"`raw`" returns the the original representation (usually sequence of integers) and "`count`" returns the fingerprint as a sequence of counts.} \item{depth}{The search depth. This argument is ignored for the `pubchem`, `maccs`, `kr` and `estate` fingerprints} -\item{size}{The final length of the fingerprint. -This argument is ignored for the `pubchem`, `maccs`, `kr`, `signature`, `circular` and +\item{size}{The final length of the fingerprint. +This argument is ignored for the `pubchem`, `maccs`, `kr`, `signature`, `circular` and `estate` fingerprints} \item{substructure.pattern}{List of characters containing the SMARTS pattern to match. If the an empty list is provided (default) than the functional groups substructures (default in CDK) are used.} @@ -56,7 +56,7 @@ This argument is ignored for the `pubchem`, `maccs`, `kr`, `signature`, `circula \item{verbose}{Verbose output if \code{TRUE}} } \value{ -an S4 object of class \code{\link{fingerprint-class}} or \code{\link{featvec-class}}, +an S4 object of class \code{\link[fingerprint]{fingerprint-class}} or \code{\link[fingerprint]{featvec-class}}, which can be manipulated with the fingerprint package. } \description{ @@ -88,7 +88,7 @@ smarts <- c("c1ccccc1", "[CX4H3][#6]", "[CX2]#[CX2]") fps <- get.fingerprint(mol1, type='substructure', fp.mode='count', substructure.pattern=smarts) -## get ECFP0 count fingerprints +## get ECFP0 count fingerprints mol2 <- parse.smiles("C1=CC=CC(=C1)CCCC2=CC=CC=C2")[[1]] fps <- get.fingerprint(mol2, type='circular', fp.mode='count', circular.type='ECFP0') } From 1dfc6e52e9b62eae4d252654637fdbe27f0162d1 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Wed, 10 Sep 2025 22:14:37 -0400 Subject: [PATCH 02/27] Add Egon and bump version --- rcdk/DESCRIPTION | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rcdk/DESCRIPTION b/rcdk/DESCRIPTION index 5d986d7cb5..e1ae3a0c46 100755 --- a/rcdk/DESCRIPTION +++ b/rcdk/DESCRIPTION @@ -1,6 +1,6 @@ Package: rcdk -Version: 3.9.0 -Date: 2024-03-02 +Version: 3.8.2 +Date: 2025-09-10 Title: Interface to the 'CDK' Libraries Authors@R: c( person('Rajarshi', 'Guha', ,'rajarshi.guha@gmail.com', role=c('aut',"cph"), @@ -8,7 +8,9 @@ Authors@R: c( person('Zachary', 'Charlop-Powers', ,'zach.charlop.powers@gmail.com',role=c('cre'), comment = c(ORCID = "0000-0001-8816-4680")), person('Emma', 'Schymanski', ,'schymane@gmail.com', role=c('ctb'), - comment = c(ORCID = "0000-0001-6868-8145"))) + comment = c(ORCID = "0000-0001-6868-8145")), + person('Egon', 'Willighagen', ,'egon.willighagen@maastrichtuniversity.nl', role=c('ctb'), + comment = c(ORCID = "0000-0001-7542-0286"))) Depends: rcdklibs (>= 2.9) Imports: From 020661b38a7b0acefaa1c05741d40126a94a41b5 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Wed, 10 Sep 2025 22:20:58 -0400 Subject: [PATCH 03/27] Fix a few test fns and lints --- rcdk/R/deprecated_functions.R | 11 +++++------ rcdk/inst/unitTests/runit.frags.R | 8 ++++---- rcdk/inst/unitTests/runit.match.R | 10 +++++----- rcdk/man/rcdk-deprecated.Rd | 6 ++---- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/rcdk/R/deprecated_functions.R b/rcdk/R/deprecated_functions.R index 4632ef0a94..2b340a5225 100644 --- a/rcdk/R/deprecated_functions.R +++ b/rcdk/R/deprecated_functions.R @@ -1,20 +1,19 @@ ################################################################################ #' Deprecated functions in the rcdk package. -#' +#' #' These functions are provided for compatibility with older version of #' the phyloseq package. They may eventually be completely #' removed. -#' +#' #' @usage deprecated_rcdk_function(x, value, ...) #' @rdname rcdk-deprecated #' @name rcdk-deprecated #' @param x For assignment operators, the object that will undergo a replacement #' (object inside parenthesis). -#' @param value For assignment operators, the value to replace with +#' @param value For assignment operators, the value to replace with #' (the right side of the assignment). -#' @param ... For functions other than assignment operators, +#' @param ... For functions other than assignment operators, #' parameters to be passed to the modern version of the function (see table). -#' @docType package #' @export do.typing #' @aliases deprecated_rcdk_function do.typing #' @details @@ -24,4 +23,4 @@ #' deprecated_rcdk_function <- function(x, value, ...){return(NULL)} do.typing <- function(...){.Deprecated("set.atom.types", package="rcdk");return(set.atom.types(...))} -################################################################################ \ No newline at end of file +################################################################################ diff --git a/rcdk/inst/unitTests/runit.frags.R b/rcdk/inst/unitTests/runit.frags.R index 7652c84f10..48cbf4b3b7 100755 --- a/rcdk/inst/unitTests/runit.frags.R +++ b/rcdk/inst/unitTests/runit.frags.R @@ -1,6 +1,6 @@ test.frag1 <- function() { m <- parse.smiles("c1(ccc(cc1C)CCC(C(CCC)C2C(C2)CC)C3C=C(C=C3)CC)C")[[1]] - do.aromaticity(m) + do.aromaticity(m) set.atom.types(m) f <- get.murcko.fragments(m, as.smiles=TRUE, min.frag.size = 6, single.framework = TRUE) checkEquals(length(f), 1) @@ -13,7 +13,7 @@ test.frag2 <- function() { ms <- parse.smiles(c('c1(ccc(cc1C)CCC(C(CCC)C2C(C2)CC)C3C=C(C=C3)CC)C', 'c1ccc(cc1)c2c(oc(n2)N(CCO)CCO)c3ccccc3', 'COc1ccc(cc1OCc2ccccc2)C(=S)N3CCOCC3')) - lapply(ms, do.aromaticity) + lapply(ms, do.aromaticity) lapply(ms, set.atom.types) f <- get.murcko.fragments(ms, as.smiles=TRUE, min.frag.size = 6, single.framework = TRUE) checkEquals(length(f), 3) @@ -26,11 +26,11 @@ test.frag3 <- function() { ms <- parse.smiles(c('c1(ccc(cc1C)CCC(C(CCC)C2C(C2)CC)C3C=C(C=C3)CC)C', 'c1ccc(cc1)c2c(oc(n2)N(CCO)CCO)c3ccccc3', 'COc1ccc(cc1OCc2ccccc2)C(=S)N3CCOCC3')) - lapply(ms, do.aromaticity) + lapply(ms, do.aromaticity) lapply(ms, set.atom.types) f <- get.murcko.fragments(ms, as.smiles=FALSE, min.frag.size = 6, single.framework = TRUE) checkEquals(length(f), 3) fworks <- unlist(lapply(f, function(x) unlist(lapply(x$frameworks, .jclass)))) - checkTrue(all(fworks == "org.openscience.cdk.silent.AtomContainer2")) + checkTrue(all(fworks %in% c("org.openscience.cdk.silent.AtomContainer", "org.openscience.cdk.silent.AtomContainer2"))) } diff --git a/rcdk/inst/unitTests/runit.match.R b/rcdk/inst/unitTests/runit.match.R index 42aee2636b..b2cb12bf05 100755 --- a/rcdk/inst/unitTests/runit.match.R +++ b/rcdk/inst/unitTests/runit.match.R @@ -44,25 +44,25 @@ test.match4 <- function() test.mcs1 <- function() { mols <- parse.smiles(c("NCc1ccccc1OC(=N)CCN", "c1ccccc1OC(=N)")) lapply(mols, do.aromaticity) - lapply(mols, set.atom.types) + lapply(mols, set.atom.types) mcs <- get.mcs(mols[[1]], mols[[2]], TRUE) - checkEquals("org.openscience.cdk.silent.AtomContainer2", .jclass(mcs)) + checkTrue(.jclass(mcs) %in% c("org.openscience.cdk.silent.AtomContainer", "org.openscience.cdk.silent.AtomContainer2")) checkEquals(9, get.atom.count(mcs)) } test.mcs3 <- function() { mols <- parse.smiles(c("c1cccc(COC(=O)NC(CC(C)C)C(=O)NC(CCc2ccccc2)C(=O)COC)c1", "c1cccc(COC(=O)NC(CC(C)C)C(=O)NCC#N)c1")) lapply(mols, do.aromaticity) - lapply(mols, set.atom.types) + lapply(mols, set.atom.types) mcs <- get.mcs(mols[[1]], mols[[2]], TRUE) - checkEquals("org.openscience.cdk.silent.AtomContainer2", .jclass(mcs)) + checkTrue(.jclass(mcs) %in% c("org.openscience.cdk.silent.AtomContainer", "org.openscience.cdk.silent.AtomContainer2")) checkEquals(21, get.atom.count(mcs)) } test.mcs2 <- function() { mols <- parse.smiles(c("NCc1ccccc1OC(=N)CCN", "c1ccccc1OC(=N)")) lapply(mols, do.aromaticity) - lapply(mols, set.atom.types) + lapply(mols, set.atom.types) mcs <- get.mcs(mols[[1]], mols[[2]], FALSE) checkTrue(inherits(mcs, "matrix")) checkEquals(9, nrow(mcs)) diff --git a/rcdk/man/rcdk-deprecated.Rd b/rcdk/man/rcdk-deprecated.Rd index 565572b90a..1dd9e4a0bf 100644 --- a/rcdk/man/rcdk-deprecated.Rd +++ b/rcdk/man/rcdk-deprecated.Rd @@ -1,8 +1,6 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprecated_functions.R -\docType{package} \name{rcdk-deprecated} -\alias{-package} \alias{rcdk-deprecated} \alias{deprecated_rcdk_function} \alias{do.typing} @@ -14,10 +12,10 @@ deprecated_rcdk_function(x, value, ...) \item{x}{For assignment operators, the object that will undergo a replacement (object inside parenthesis).} -\item{value}{For assignment operators, the value to replace with +\item{value}{For assignment operators, the value to replace with (the right side of the assignment).} -\item{...}{For functions other than assignment operators, +\item{...}{For functions other than assignment operators, parameters to be passed to the modern version of the function (see table).} } \description{ From 10d4d25dde33b8a574f2aa44817bf951a3ff610f Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Wed, 10 Sep 2025 22:24:12 -0400 Subject: [PATCH 04/27] update news file and Cran comments for submission --- rcdk/NEWS.md | 4 ++-- rcdk/cran-comments.md | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/rcdk/NEWS.md b/rcdk/NEWS.md index 93b8833b64..fa4bf8ca0f 100755 --- a/rcdk/NEWS.md +++ b/rcdk/NEWS.md @@ -1,6 +1,6 @@ -# rcdk 3.9.0 +# rcdk 3.8.2 -* Update rCDK to work with rcdklibs 2.9 +* Update rCDK to work with rcdklibs 2.11 # rcdk 3.8.0 diff --git a/rcdk/cran-comments.md b/rcdk/cran-comments.md index cd7560c9fa..acdf4a6d17 100755 --- a/rcdk/cran-comments.md +++ b/rcdk/cran-comments.md @@ -1,5 +1,6 @@ ## General Comments -- submission of rcdk 3.8.1 -- fix to the SystemRequirements Field -- update CITATION file +- submission of rcdk 3.8.2 +- add Egon +- update links to fingerprint +- fix a few lints and tests From 15089f1458c53a269749778f582aa628f5ada5fb Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Wed, 10 Sep 2025 22:34:55 -0400 Subject: [PATCH 05/27] update cache --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 0041cccfb3..6a4b9a7ef5 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -46,7 +46,7 @@ jobs: shell: Rscript {0} - name: Cache R packages - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.R_LIBS_USER }} key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} From 120ec334499f2bd84d035f3d8d58d131f1733af1 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Thu, 11 Sep 2025 20:52:15 -0400 Subject: [PATCH 06/27] http->https URL Updates --- README.Rmd | 2 +- rcdk/DESCRIPTION | 1 - rcdk/R/formula.R | 6 +++--- rcdk/R/rcdk.R | 4 ++-- rcdk/R/smiles.R | 12 ++++++------ rcdk/man/compare.isotope.pattern.Rd | 2 +- rcdk/man/get.chem.object.builder.Rd | 4 ++-- rcdk/man/get.isotope.pattern.generator.Rd | 2 +- rcdk/man/get.isotope.pattern.similarity.Rd | 2 +- rcdk/man/get.smiles.Rd | 4 ++-- rcdk/man/get.smiles.parser.Rd | 2 +- rcdk/man/smiles.flavors.Rd | 6 +++--- 12 files changed, 23 insertions(+), 24 deletions(-) diff --git a/README.Rmd b/README.Rmd index ca99152714..d42e252237 100755 --- a/README.Rmd +++ b/README.Rmd @@ -11,7 +11,7 @@ knitr::opts_chunk$set( ) ``` -[![Build Status](https://api.travis-ci.org/CDK-R/cdkr.svg?branch=master)](https://travis-ci.org/CDK-R/cdkr) + [![CRAN Version](https://www.r-pkg.org/badges/version/rcdk?color=green)](https://cran.r-project.org/package=rcdk) [![CRAN Downloads](http://cranlogs.r-pkg.org/badges/grand-total/rcdk?color=green)](https://cran.r-project.org/package=rcdk) [![CRAN Downloads Monthyl](http://cranlogs.r-pkg.org/badges/last-month/rcdk?color=green)](https://cran.r-project.org/package=rcdk) diff --git a/rcdk/DESCRIPTION b/rcdk/DESCRIPTION index e1ae3a0c46..3cd9d64c5e 100755 --- a/rcdk/DESCRIPTION +++ b/rcdk/DESCRIPTION @@ -40,4 +40,3 @@ Description: Allows the user to access functionality in the RoxygenNote: 7.3.3 VignetteBuilder: knitr Encoding: UTF-8 -Remotes: CDK-R/depict diff --git a/rcdk/R/formula.R b/rcdk/R/formula.R index dfa58ac84b..855de50111 100755 --- a/rcdk/R/formula.R +++ b/rcdk/R/formula.R @@ -528,7 +528,7 @@ generate.formula <- function(mass, #' @param tol The tolerance #' @return A \code{jobjRef} corresponding to an instance of \code{IsotopePatternSimilarity} #' @seealso \code{\link{compare.isotope.pattern}} -#' @references \url{http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/formula/IsotopePatternSimilarity.html} +#' @references \url{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/formula/IsotopePatternSimilarity.html} #' @author Miguel Rojas Cherto get.isotope.pattern.similarity <- function(tol = NULL) { ips <- .jnew("org/openscience/cdk/formula/IsotopePatternSimilarity") @@ -544,7 +544,7 @@ get.isotope.pattern.similarity <- function(tol = NULL) { #' #' @param minAbundance The minimum abundance #' @return A \code{jobjRef} corresponding to an instance of \code{IsotopePatternGenerator} -#' @references \url{http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/formula/IsotopePatternGenerator.html} +#' @references \url{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/formula/IsotopePatternGenerator.html} #' @author Miguel Rojas Cherto get.isotope.pattern.generator <- function(minAbundance = NULL) { if (is.null(minAbundance)) @@ -564,7 +564,7 @@ get.isotope.pattern.generator <- function(minAbundance = NULL) { #' @return A numeric value between 0 and 1 indicating the similarity between the two patterns #' @seealso \code{\link{get.isotope.pattern.similarity}} #' @export -#' @references \url{http://cdk.github.io/cdk/2.3/docs/api/org/openscience/cdk/formula/IsotopePatternSimilarity.html} +#' @references \url{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/formula/IsotopePatternSimilarity.html} #' @author Miguel Rojas Cherto compare.isotope.pattern <- function(iso1, iso2, ips = NULL) { cls <- unique(c(class(iso1), class(iso2))) diff --git a/rcdk/R/rcdk.R b/rcdk/R/rcdk.R index 50b1b4a0c9..9897333015 100755 --- a/rcdk/R/rcdk.R +++ b/rcdk/R/rcdk.R @@ -10,11 +10,11 @@ #' a builder object when directly working with the CDK API via #' `rJava`. #' -#' This method returns an instance of the \href{https://cdk.github.io/cdk/2.5/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder}. +#' This method returns an instance of the \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder}. #' Note that this is a static object that is created at package load time, #' and the same instance is returned whenever this function is called. #' -#' @return An instance of \href{https://cdk.github.io/cdk/2.5/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder} +#' @return An instance of \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder} #' @export #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) get.chem.object.builder <- function() { diff --git a/rcdk/R/smiles.R b/rcdk/R/smiles.R index 995e8c1b7d..6122d82989 100755 --- a/rcdk/R/smiles.R +++ b/rcdk/R/smiles.R @@ -2,11 +2,11 @@ #' #' The CDK supports a variety of customizations for SMILES generation including #' the use of lower case symbols for aromatic compounds to the use of the ChemAxon -#' \href{http://butane.chem.uiuc.edu/jsmoore/marvin/help/formats/cxsmiles-doc.html}{CxSmiles} +#' \href{https://docs.chemaxon.com/display/docs/formats_chemaxon-extended-smiles-and-smarts-cxsmiles-and-cxsmarts.md}{CxSmiles} #' format. Each 'flavor' is represented by an integer and multiple #' customizations are bitwise OR'ed. This method accepts the names of one or #' more customizations and returns the bitwise OR of them. -#' See \href{https://cdk.github.io/cdk/2.3/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation} +#' See \href{https://cdk.github.io/cdk/2.10/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation} #' for the list of flavors and what they mean. #' #' @param flavors A character vector of flavors. The default is \code{Generic} @@ -42,7 +42,7 @@ #' @md #' @return A numeric representing the bitwise `OR`` of the specified flavors #' @seealso \code{\link{get.smiles}} -#' @references \href{https://cdk.github.io/cdk/2.3/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation} +#' @references \href{https://cdk.github.io/cdk/2.10/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation} #' @examples #' m <- parse.smiles('C1C=CCC1N(C)c1ccccc1')[[1]] #' get.smiles(m) @@ -96,7 +96,7 @@ smiles.flavors <- function(flavors = c('Generic')) { #' The function will generate a SMILES representation of an #' `IAtomContainer` object. The default parameters of the CDK SMILES #' generator are used. This can mean that for large ring systems the -#' method may fail. See CDK \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{Javadocs} +#' method may fail. See CDK \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{Javadocs} #' for more information #' @param molecule The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @param flavor The type of SMILES to generate. See \code{\link{smiles.flavors}}. Default is `Generic` @@ -106,7 +106,7 @@ smiles.flavors <- function(flavors = c('Generic')) { #' @seealso \code{\link{parse.smiles}}, \code{\link{smiles.flavors}} #' @export #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) -#' @references \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{SmilesGenerator} +#' @references \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{SmilesGenerator} #' @examples #' m <- parse.smiles('C1C=CCC1N(C)c1ccccc1')[[1]] #' get.smiles(m) @@ -127,7 +127,7 @@ get.smiles <- function(molecule, flavor = smiles.flavors(c('Generic')), smigen = #' to instantiate a new parser for each call #' #' @return A `jobjRef` object corresponding to the CDK -#' \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesParser.html}{SmilesParser} class +#' \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/smiles/SmilesParser.html}{SmilesParser} class #' @seealso \code{\link{get.smiles}}, \code{\link{parse.smiles}} #' @export #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) diff --git a/rcdk/man/compare.isotope.pattern.Rd b/rcdk/man/compare.isotope.pattern.Rd index a31a2fb8a7..5ab0fa9950 100755 --- a/rcdk/man/compare.isotope.pattern.Rd +++ b/rcdk/man/compare.isotope.pattern.Rd @@ -20,7 +20,7 @@ A numeric value between 0 and 1 indicating the similarity between the two patter Computes a similarity score between two different isotope abundance patterns. } \references{ -\url{http://cdk.github.io/cdk/2.3/docs/api/org/openscience/cdk/formula/IsotopePatternSimilarity.html} +\url{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/formula/IsotopePatternSimilarity.html} } \seealso{ \code{\link{get.isotope.pattern.similarity}} diff --git a/rcdk/man/get.chem.object.builder.Rd b/rcdk/man/get.chem.object.builder.Rd index a0fa56510e..8323428128 100755 --- a/rcdk/man/get.chem.object.builder.Rd +++ b/rcdk/man/get.chem.object.builder.Rd @@ -7,7 +7,7 @@ get.chem.object.builder() } \value{ -An instance of \href{https://cdk.github.io/cdk/2.5/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder} +An instance of \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder} } \description{ The CDK employs a builder design pattern to construct @@ -19,7 +19,7 @@ a builder object when directly working with the CDK API via `rJava`. } \details{ -This method returns an instance of the \href{https://cdk.github.io/cdk/2.5/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder}. +This method returns an instance of the \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder}. Note that this is a static object that is created at package load time, and the same instance is returned whenever this function is called. } diff --git a/rcdk/man/get.isotope.pattern.generator.Rd b/rcdk/man/get.isotope.pattern.generator.Rd index 1cebfea6dd..e645bd7076 100755 --- a/rcdk/man/get.isotope.pattern.generator.Rd +++ b/rcdk/man/get.isotope.pattern.generator.Rd @@ -18,7 +18,7 @@ minimum abundance specified. This object can be used to generate all combinatori chemical isotopes given a structure. } \references{ -\url{http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/formula/IsotopePatternGenerator.html} +\url{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/formula/IsotopePatternGenerator.html} } \author{ Miguel Rojas Cherto diff --git a/rcdk/man/get.isotope.pattern.similarity.Rd b/rcdk/man/get.isotope.pattern.similarity.Rd index 2e45d6a01c..b21e5859c4 100755 --- a/rcdk/man/get.isotope.pattern.similarity.Rd +++ b/rcdk/man/get.isotope.pattern.similarity.Rd @@ -18,7 +18,7 @@ class which can be used to compute similarity scores between pairs of isotope abundance patterns. } \references{ -\url{http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/formula/IsotopePatternSimilarity.html} +\url{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/formula/IsotopePatternSimilarity.html} } \seealso{ \code{\link{compare.isotope.pattern}} diff --git a/rcdk/man/get.smiles.Rd b/rcdk/man/get.smiles.Rd index 73707ef94e..f8fbc8349c 100755 --- a/rcdk/man/get.smiles.Rd +++ b/rcdk/man/get.smiles.Rd @@ -21,7 +21,7 @@ A character string containing the generated SMILES The function will generate a SMILES representation of an `IAtomContainer` object. The default parameters of the CDK SMILES generator are used. This can mean that for large ring systems the -method may fail. See CDK \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{Javadocs} +method may fail. See CDK \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{Javadocs} for more information } \examples{ @@ -30,7 +30,7 @@ get.smiles(m) get.smiles(m, smiles.flavors(c('Generic','UseAromaticSymbols'))) } \references{ -\href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{SmilesGenerator} +\href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{SmilesGenerator} } \seealso{ \code{\link{parse.smiles}}, \code{\link{smiles.flavors}} diff --git a/rcdk/man/get.smiles.parser.Rd b/rcdk/man/get.smiles.parser.Rd index d72aef0bc9..b8780aaee6 100755 --- a/rcdk/man/get.smiles.parser.Rd +++ b/rcdk/man/get.smiles.parser.Rd @@ -8,7 +8,7 @@ get.smiles.parser() } \value{ A `jobjRef` object corresponding to the CDK -\href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesParser.html}{SmilesParser} class +\href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/smiles/SmilesParser.html}{SmilesParser} class } \description{ This function returns a reference to a SMILES parser diff --git a/rcdk/man/smiles.flavors.Rd b/rcdk/man/smiles.flavors.Rd index 1a04ffbbd5..a0cd49ad0c 100755 --- a/rcdk/man/smiles.flavors.Rd +++ b/rcdk/man/smiles.flavors.Rd @@ -46,11 +46,11 @@ A numeric representing the bitwise `OR`` of the specified flavors \description{ The CDK supports a variety of customizations for SMILES generation including the use of lower case symbols for aromatic compounds to the use of the ChemAxon -\href{http://butane.chem.uiuc.edu/jsmoore/marvin/help/formats/cxsmiles-doc.html}{CxSmiles} +\href{https://docs.chemaxon.com/display/docs/formats_chemaxon-extended-smiles-and-smarts-cxsmiles-and-cxsmarts.md}{CxSmiles} format. Each 'flavor' is represented by an integer and multiple customizations are bitwise OR'ed. This method accepts the names of one or more customizations and returns the bitwise OR of them. -See \href{https://cdk.github.io/cdk/2.3/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation} +See \href{https://cdk.github.io/cdk/2.10/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation} for the list of flavors and what they mean. } \examples{ @@ -64,7 +64,7 @@ get.smiles(m,flavor = smiles.flavors(c("CxSmiles","UseAromaticSymbols"))) } \references{ -\href{https://cdk.github.io/cdk/2.3/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation} +\href{https://cdk.github.io/cdk/2.10/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation} } \seealso{ \code{\link{get.smiles}} From 8d1cde86b8ac2ff32fe5fe93db4b0a075d7519b7 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Thu, 11 Sep 2025 20:58:01 -0400 Subject: [PATCH 07/27] update description and Vignetter --- rcdk/DESCRIPTION | 9 ++++----- rcdk/vignettes/Features_29.Rmd | 24 +++++++++++++++--------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/rcdk/DESCRIPTION b/rcdk/DESCRIPTION index 3cd9d64c5e..405e81630f 100755 --- a/rcdk/DESCRIPTION +++ b/rcdk/DESCRIPTION @@ -5,11 +5,11 @@ Title: Interface to the 'CDK' Libraries Authors@R: c( person('Rajarshi', 'Guha', ,'rajarshi.guha@gmail.com', role=c('aut',"cph"), comment = c(ORCID = "0000-0001-7403-8819")), - person('Zachary', 'Charlop-Powers', ,'zach.charlop.powers@gmail.com',role=c('cre'), + person('Zachary', 'Charlop-Powers', ,'zach.charlop.powers@gmail.com',role=c('cre'), comment = c(ORCID = "0000-0001-8816-4680")), - person('Emma', 'Schymanski', ,'schymane@gmail.com', role=c('ctb'), + person('Emma', 'Schymanski', ,'schymane@gmail.com', role=c('ctb'), comment = c(ORCID = "0000-0001-6868-8145")), - person('Egon', 'Willighagen', ,'egon.willighagen@maastrichtuniversity.nl', role=c('ctb'), + person('Egon', 'Willighagen', ,'egon.willighagen@maastrichtuniversity.nl', role=c('ctb'), comment = c(ORCID = "0000-0001-7542-0286"))) Depends: rcdklibs (>= 2.9) @@ -25,8 +25,7 @@ Suggests: RUnit, knitr, rmarkdown, - devtools, - depict + devtools License: LGPL URL: https://github.com/CDK-R/cdkr LazyLoad: yes diff --git a/rcdk/vignettes/Features_29.Rmd b/rcdk/vignettes/Features_29.Rmd index c2b2592c30..a2d4bf43ea 100644 --- a/rcdk/vignettes/Features_29.Rmd +++ b/rcdk/vignettes/Features_29.Rmd @@ -2,9 +2,9 @@ title: "Features 2.9" author: "Zachary Charlop-Powers" date: "`r Sys.Date()`" -output: +output: rmarkdown::html_vignette: - toc: true + toc: true vignette: > %\VignetteIndexEntry{Features 2.9} %\VignetteEngine{knitr::rmarkdown} @@ -24,6 +24,13 @@ vignette: > https://github.com/cdk/cdk/pull/927 ```{r setup, message=FALSE} +# Install depict from GitHub if not available +if (!requireNamespace("depict", quietly = TRUE)) { + if (!requireNamespace("remotes", quietly = TRUE)) { + install.packages("remotes") + } + remotes::install_github("CDK-R/depict") +} library(rcdklibs) library(depict) @@ -58,7 +65,7 @@ rxn <- smiles_parser$parseReactionSmiles("[CH3:9][CH:8]([CH3:10])[c:7]1[cH:11][c # note `as.list` for (mol in as.list(ReactionManipulator$getAllAtomContainers(rxn))) { - abbreviations$apply(mol) + abbreviations$apply(mol) } rxn$setDirection(IReaction$Direction$NO_GO) @@ -73,7 +80,7 @@ to_svg(svg) rxn <- smiles_parser$parseReactionSmiles("[CH3:9][CH:8]([CH3:10])[c:7]1[cH:11][cH:12][cH:13][cH:14][cH:15]1.[CH2:3]([CH2:4][C:5](=[O:6])Cl)[CH2:2][Cl:1]>[Al+3].[Cl-].[Cl-].[Cl-].C(Cl)Cl>[CH3:9][CH:8]([CH3:10])[c:7]1[cH:11][cH:12][c:13]([cH:14][cH:15]1)[C:5](=[O:6])[CH2:4][CH2:3][CH2:2][Cl:1] |f:2.3.4.5| Friedel-Crafts acylation [3.10.1]") for (mol in as.list(ReactionManipulator$getAllAtomContainers(rxn))) { - abbreviations$apply(mol) + abbreviations$apply(mol) } rxn$setDirection(IReaction$Direction$RETRO_SYNTHETIC) @@ -87,7 +94,7 @@ to_svg(svg) rxn <- smiles_parser$parseReactionSmiles("[CH3:9][CH:8]([CH3:10])[c:7]1[cH:11][cH:12][cH:13][cH:14][cH:15]1.[CH2:3]([CH2:4][C:5](=[O:6])Cl)[CH2:2][Cl:1]>[Al+3].[Cl-].[Cl-].[Cl-].C(Cl)Cl>[CH3:9][CH:8]([CH3:10])[c:7]1[cH:11][cH:12][c:13]([cH:14][cH:15]1)[C:5](=[O:6])[CH2:4][CH2:3][CH2:2][Cl:1] |f:2.3.4.5| Friedel-Crafts acylation [3.10.1]") for (mol in as.list(ReactionManipulator$getAllAtomContainers(rxn))) { - abbreviations$apply(mol) + abbreviations$apply(mol) } rxn$setDirection(IReaction$Direction$BIDIRECTIONAL) @@ -101,7 +108,7 @@ rxn <- smiles_parser$parseReactionSmiles("c1c(Cl)cccc1[N-][N+]#N>>c1c(Cl)cccc1N= for (mol in as.list(ReactionManipulator$getAllAtomContainers(rxn))) { - abbreviations$apply(mol) + abbreviations$apply(mol) } rxn$setDirection(IReaction$Direction$RESONANCE) @@ -161,14 +168,13 @@ maygen$setFormula("C3Cl2H4") # StringWriter sw = new StringWriter(); # SdfOutputConsumer consumer = new SdfOutputConsumer(sw); # consumer.setCoordinates(true); - + # maygen.setConsumer(mol -> { # try { # System.out.println(smigen.create(mol)); # } catch (CDKException ignore) { } # }); -# +# # int count = maygen.getCount(); // number of structures generated ``` - From ac5c6ea6f8496a6d88815445be3fca47bc60354c Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Thu, 11 Sep 2025 21:03:42 -0400 Subject: [PATCH 08/27] ignore the vignette that uses depict, which is not on CRAN and is causing problems --- rcdk/.Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/rcdk/.Rbuildignore b/rcdk/.Rbuildignore index 9620f7bb6f..c6a79ec5f0 100755 --- a/rcdk/.Rbuildignore +++ b/rcdk/.Rbuildignore @@ -6,3 +6,4 @@ ^pkgdown$ ^revdep$ README.Rmd +vignettes/Features_29.Rmd From 18debc3efd73ba28b49ab3c72a17029e7574937a Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Thu, 11 Sep 2025 21:27:06 -0400 Subject: [PATCH 09/27] add cran --- rcdk/cran-comments.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rcdk/cran-comments.md b/rcdk/cran-comments.md index acdf4a6d17..b57f5b9f52 100755 --- a/rcdk/cran-comments.md +++ b/rcdk/cran-comments.md @@ -1,6 +1,13 @@ ## General Comments +- resubmission with a number of small URL fixes - submission of rcdk 3.8.2 - add Egon - update links to fingerprint - fix a few lints and tests + + +── R CMD check results ─────────────────────────────── rcdk 3.8.2 ──── +Duration: 25.5s + +0 errors ✔ | 0 warnings ✔ | 0 notes ✔ \ No newline at end of file From 53b9c3441211cf01b1ee3ad001bbaf5239bebe9c Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 13 Sep 2025 12:50:21 -0400 Subject: [PATCH 10/27] Clean build --- rcdk/.Rbuildignore | 2 ++ rcdk/.gitignore | 4 +++- rcdk/DESCRIPTION | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/rcdk/.Rbuildignore b/rcdk/.Rbuildignore index c6a79ec5f0..f23f3d5f5b 100755 --- a/rcdk/.Rbuildignore +++ b/rcdk/.Rbuildignore @@ -7,3 +7,5 @@ ^revdep$ README.Rmd vignettes/Features_29.Rmd +^doc$ +^Meta$ diff --git a/rcdk/.gitignore b/rcdk/.gitignore index 3a8d2860f7..607761ce7f 100755 --- a/rcdk/.gitignore +++ b/rcdk/.gitignore @@ -5,4 +5,6 @@ revdep/ .Rhistory .Rproj *.Rmd -*.png \ No newline at end of file +*.png +/doc/ +/Meta/ diff --git a/rcdk/DESCRIPTION b/rcdk/DESCRIPTION index 405e81630f..baecd60e25 100755 --- a/rcdk/DESCRIPTION +++ b/rcdk/DESCRIPTION @@ -39,3 +39,4 @@ Description: Allows the user to access functionality in the RoxygenNote: 7.3.3 VignetteBuilder: knitr Encoding: UTF-8 + From 68dccfcf3a2d6568a2eddaaba2a67e16ee9cd871 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 13 Sep 2025 12:55:36 -0400 Subject: [PATCH 11/27] simplify examples for CRAN tests --- rcdk/R/fingerprint.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rcdk/R/fingerprint.R b/rcdk/R/fingerprint.R index bd4af48efd..d374f3ac00 100755 --- a/rcdk/R/fingerprint.R +++ b/rcdk/R/fingerprint.R @@ -40,15 +40,13 @@ #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) #' @examples #' ## get some molecules -#' sp <- get.smiles.parser() -#' smiles <- c('CCC', 'CCN', 'CCN(C)(C)', 'c1ccccc1Cc1ccccc1','C1CCC1CC(CN(C)(C))CC(=O)CC') +#' smiles <- c('CCC', 'CCN') #' mols <- parse.smiles(smiles) #' -#' ## get a single fingerprint using the standard -#' ## (hashed, path based) fingerprinter -#' fp <- get.fingerprint(mols[[1]]) +#' ## get a single fingerprint using MACCS (fast) +#' fp <- get.fingerprint(mols[[1]], type='maccs') #' -#' ## get MACCS keys for all the molecules +#' ## get MACCS keys for both molecules #' fps <- lapply(mols, get.fingerprint, type='maccs') #' #' ## get Signature fingerprint From 470bd3e2895775f4c36ab934f8755323c8b7cb23 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 13 Sep 2025 12:59:01 -0400 Subject: [PATCH 12/27] Remove expensive tests when on CRAN --- rcdk/inst/unitTests/runit.fingerprints.R | 51 +++++++++++++----------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/rcdk/inst/unitTests/runit.fingerprints.R b/rcdk/inst/unitTests/runit.fingerprints.R index 91794630e1..63b2d7e103 100755 --- a/rcdk/inst/unitTests/runit.fingerprints.R +++ b/rcdk/inst/unitTests/runit.fingerprints.R @@ -2,10 +2,14 @@ test.fp <- function() { mol <- parse.smiles("CCCCC")[[1]] fp <- get.fingerprint(mol, type='maccs') checkTrue(length(fp@bits) > 0) - fp <- get.fingerprint(mol, type='kr') - checkTrue(length(fp@bits) > 0) - fp <- get.fingerprint(mol, type='shortestpath') - checkTrue(length(fp@bits) > 0) + + # Skip slow tests during CRAN check + if (Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") == "") { + fp <- get.fingerprint(mol, type='kr') + checkTrue(length(fp@bits) > 0) + fp <- get.fingerprint(mol, type='shortestpath') + checkTrue(length(fp@bits) > 0) + } } # Substructure test are inspired by the test for the substructure fingerprints in CDK @@ -14,43 +18,43 @@ test.fp.substructures.binary <- function() { mol <- parse.smiles("c1ccccc1CCC")[[1]] fp <- get.fingerprint(mol, type="substructure", fp.mode="bit") fp_bits <- fingerprint::fp.to.matrix(list(fp)) - + checkEquals(length(fp), 307) checkEquals(fp_bits[1], 1) checkEquals(fp_bits[2], 1) checkEquals(fp_bits[274], 1) checkEquals(fp_bits[101], 0) - + # User defined patterns smarts <- c("c1ccccc1", "[CX4H3][#6]", "[CX2]#[CX2]") mol <- parse.smiles("c1ccccc1CCC")[[1]] - fp <- get.fingerprint(mol, type="substructure", fp.mode="bit", + fp <- get.fingerprint(mol, type="substructure", fp.mode="bit", substructure.pattern = smarts) fp_bits <- fingerprint::fp.to.matrix(list(fp)) - + checkEquals(length(fp), 3) checkEquals(length(fp@bits), 2) checkEquals(fp_bits[1], 1) checkEquals(fp_bits[2], 1) checkEquals(fp_bits[3], 0) - + mol <- parse.smiles("C=C=C")[[1]] - fp <- get.fingerprint(mol, type="substructure", fp.mode="bit", + fp <- get.fingerprint(mol, type="substructure", fp.mode="bit", substructure.pattern = smarts) fp_bits <- fingerprint::fp.to.matrix(list(fp)) - + checkEquals(length(fp), 3) checkEquals(length(fp@bits), 0) for (i_fp in 1:3) { checkEquals(fp_bits[i_fp], 0) } - + # Check for aromatic ring smarts <- "a:1:a:a:a:a:a1" mol <- parse.smiles("C1=CC=CC(=C1)CCCC2=CC=CC=C2")[[1]] set.atom.types(mol) do.aromaticity(mol) - fp <- get.fingerprint(mol, type="substructure", fp.mode="bit", + fp <- get.fingerprint(mol, type="substructure", fp.mode="bit", substructure.pattern = smarts) fp_bits <- fingerprint::fp.to.matrix(list(fp)) checkEquals(length(fp), 1) @@ -61,42 +65,41 @@ test.fp.substructures.count <- function() { # Default patterns: functional groups mol <- parse.smiles("c1ccccc1CCC")[[1]] fp <- get.fingerprint(mol, type="substructure", fp.mode="count") - + checkEquals(length(fp), 307) checkTrue(fingerprint::count(fp@features[[1]]) > 0) checkTrue(fingerprint::count(fp@features[[2]]) > 0) checkTrue(fingerprint::count(fp@features[[274]]) > 0) checkTrue(fingerprint::count(fp@features[[101]]) == 0) - + # User defined patterns smarts <- c("c1ccccc1", "[CX4H3][#6]", "[CX2]#[CX2]") mol <- parse.smiles("c1ccccc1CCC")[[1]] - fp <- get.fingerprint(mol, type="substructure", fp.mode="count", + fp <- get.fingerprint(mol, type="substructure", fp.mode="count", substructure.pattern = smarts) - + checkEquals(length(fp), 3) checkEquals(fingerprint::count(fp@features[[1]]), 1) checkEquals(fingerprint::count(fp@features[[2]]), 1) checkEquals(fingerprint::count(fp@features[[3]]), 0) - + mol <- parse.smiles("C=C=C")[[1]] - fp <- get.fingerprint(mol, type="substructure", fp.mode="count", + fp <- get.fingerprint(mol, type="substructure", fp.mode="count", substructure.pattern = smarts) - + checkEquals(length(fp), 3) for (i_fp in 1:3) { checkEquals(fingerprint::count(fp@features[[i_fp]]), 0) } - + # Check for aromatic ring smarts <- "a:1:a:a:a:a:a1" mol <- parse.smiles("C1=CC=CC(=C1)CCCC2=CC=CC=C2")[[1]] set.atom.types(mol) do.aromaticity(mol) - fp <- get.fingerprint(mol, type="substructure", fp.mode="count", + fp <- get.fingerprint(mol, type="substructure", fp.mode="count", substructure.pattern = smarts) - + checkEquals(length(fp), 1) checkEquals(fingerprint::count(fp@features[[1]]), 2) } - From a25f70c1ff3ffbdf761905d811958cc772e383b2 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 13 Sep 2025 13:00:57 -0400 Subject: [PATCH 13/27] Using MACCs in our examples to speed calculation --- rcdk/man/get.fingerprint.Rd | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rcdk/man/get.fingerprint.Rd b/rcdk/man/get.fingerprint.Rd index 605e0ac87f..f47ca2c011 100755 --- a/rcdk/man/get.fingerprint.Rd +++ b/rcdk/man/get.fingerprint.Rd @@ -65,15 +65,13 @@ the input molecule. } \examples{ ## get some molecules -sp <- get.smiles.parser() -smiles <- c('CCC', 'CCN', 'CCN(C)(C)', 'c1ccccc1Cc1ccccc1','C1CCC1CC(CN(C)(C))CC(=O)CC') +smiles <- c('CCC', 'CCN') mols <- parse.smiles(smiles) -## get a single fingerprint using the standard -## (hashed, path based) fingerprinter -fp <- get.fingerprint(mols[[1]]) +## get a single fingerprint using MACCS (fast) +fp <- get.fingerprint(mols[[1]], type='maccs') -## get MACCS keys for all the molecules +## get MACCS keys for both molecules fps <- lapply(mols, get.fingerprint, type='maccs') ## get Signature fingerprint From 1cc5a7c476dee93a76033f9b47d594c2ae5540c6 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 13 Sep 2025 13:01:15 -0400 Subject: [PATCH 14/27] update cran_comments --- rcdk/cran-comments.md | 1 + 1 file changed, 1 insertion(+) diff --git a/rcdk/cran-comments.md b/rcdk/cran-comments.md index b57f5b9f52..833f3b2496 100755 --- a/rcdk/cran-comments.md +++ b/rcdk/cran-comments.md @@ -1,5 +1,6 @@ ## General Comments +- 2nd resubmission. The URL issue seems to be related to CACHE; I've slimmed the Unit tests to fit in time. - resubmission with a number of small URL fixes - submission of rcdk 3.8.2 - add Egon From bf6352ba78fefbd37601e045f9a6f4b173f68b37 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 13 Sep 2025 18:11:20 -0400 Subject: [PATCH 15/27] update the linux CRAN detection --- rcdk/inst/unitTests/runit.fingerprints.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rcdk/inst/unitTests/runit.fingerprints.R b/rcdk/inst/unitTests/runit.fingerprints.R index 63b2d7e103..08f22e8067 100755 --- a/rcdk/inst/unitTests/runit.fingerprints.R +++ b/rcdk/inst/unitTests/runit.fingerprints.R @@ -3,8 +3,12 @@ test.fp <- function() { fp <- get.fingerprint(mol, type='maccs') checkTrue(length(fp@bits) > 0) - # Skip slow tests during CRAN check - if (Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") == "") { + # Skip slow tests during CRAN check (use multiple detection methods) + is_cran_check <- Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" || + Sys.getenv("_R_CHECK_TIMINGS_", "") != "" || + identical(Sys.getenv("NOT_CRAN"), "false") + + if (!is_cran_check) { fp <- get.fingerprint(mol, type='kr') checkTrue(length(fp@bits) > 0) fp <- get.fingerprint(mol, type='shortestpath') From 9249775c466043605cb40092dfd8e13b91e5cc70 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 13 Sep 2025 18:16:56 -0400 Subject: [PATCH 16/27] fix one more URL --- rcdk/vignettes/rcdk.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rcdk/vignettes/rcdk.Rmd b/rcdk/vignettes/rcdk.Rmd index 36696c4355..f23f8014c8 100644 --- a/rcdk/vignettes/rcdk.Rmd +++ b/rcdk/vignettes/rcdk.Rmd @@ -159,7 +159,7 @@ get.smiles(mols[[3]], smiles.flavors(c('UseAromaticSymbols'))) get.smiles(mols[[3]], smiles.flavors(c('Generic','CxSmiles'))) ``` -Using the [CxSmiles](http://butane.chem.uiuc.edu/jsmoore/marvin/help/formats/cxsmiles-doc.html) flavors allows the user to encode a variety of information in the SMILES string, such as 2D or 3D coordinates. +Using the [CxSmiles](https://docs.chemaxon.com/display/docs/formats_chemaxon-extended-smiles-and-smarts-cxsmiles-and-cxsmarts.md) flavors allows the user to encode a variety of information in the SMILES string, such as 2D or 3D coordinates. ```{r} m <- parse.smiles('CCC')[[1]] From 4ddbec8a619559a93dd71affb81571a1f2fe5a51 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sat, 13 Sep 2025 18:18:05 -0400 Subject: [PATCH 17/27] update CRAN submission markdown --- rcdk/cran-comments.md | 1 + 1 file changed, 1 insertion(+) diff --git a/rcdk/cran-comments.md b/rcdk/cran-comments.md index 833f3b2496..04c749215e 100755 --- a/rcdk/cran-comments.md +++ b/rcdk/cran-comments.md @@ -1,5 +1,6 @@ ## General Comments +- 3rd resubmission. Fixed the final URL issue. More long test avoidance on CRAN/linux - 2nd resubmission. The URL issue seems to be related to CACHE; I've slimmed the Unit tests to fit in time. - resubmission with a number of small URL fixes - submission of rcdk 3.8.2 From 2e7d7c5306894bc7da491ff61c69f4adf9c738e3 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Fri, 28 Nov 2025 10:14:27 -0500 Subject: [PATCH 18/27] fix: URL moved permanently --- rcdk/vignettes/PerformanceNotes.Rmd | 34 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/rcdk/vignettes/PerformanceNotes.Rmd b/rcdk/vignettes/PerformanceNotes.Rmd index 589a098394..5fae5a8db6 100644 --- a/rcdk/vignettes/PerformanceNotes.Rmd +++ b/rcdk/vignettes/PerformanceNotes.Rmd @@ -2,9 +2,9 @@ title: "Performance Notes" author: "Zaachary Charlop-Powers" date: "`r Sys.Date()`" -output: +output: rmarkdown::html_vignette: - toc: true + toc: true vignette: > %\VignetteIndexEntry{Performance Notes} %\VignetteEngine{knitr::rmarkdown} @@ -14,7 +14,7 @@ vignette: > ## rCDK Performance In September 2022, of this year, [Stepehn Neumann](https://gist.github.com/sneumann) -[created a benchmark](https://gist.github.com/sneumann/959a6d205ea4ac73eaf1393da0ec0673) for moecular weight calculation that he [announced on twitter](https://twitter.com/sneumannoffice/status/1570070283083710465?s=20&t=RqJR3Bbh-DEcbCf2tWUEBQ) showing that rCDK had dismal performance relative to other tools in the R ecosystem. Something seemed a bit off so I looked into the code. +[created a benchmark](https://gist.github.com/sneumann/959a6d205ea4ac73eaf1393da0ec0673) for moecular weight calculation that he [announced on twitter](https://x.com/sneumannoffice/status/1570070283083710465?s=20&t=RqJR3Bbh-DEcbCf2tWUEBQ) showing that rCDK had dismal performance relative to other tools in the R ecosystem. Something seemed a bit off so I looked into the code. What I discovered is that the mass spec calculations were mediated by R classes instead of accessing the underlying Java code directly and if you write a function that does that you get a speedup, and if you avoid reflection by creating static calls then you @@ -30,12 +30,12 @@ see that progression in the code below which is accompanied by the outputs from ```sh will give (2/3) runtime in µs: - 21 OrgMassSpecR + 21 OrgMassSpecR 163 MetaboCoreUtils - 197 enviPat - 545 Rdisop - 645 CHNOSZ - 4863 ChemmineR + 197 enviPat + 545 Rdisop + 645 CHNOSZ + 4863 ChemmineR 22510 rcdk ``` @@ -64,7 +64,7 @@ data(isotopes) # original # https://github.com/CDK-R/cdkr/blob/master/rcdk/R/formula.R # get.formula <- function(mf, charge=0) { -# +# # manipulator <- get("mfManipulator", envir = .rcdk.GlobalEnv) # if(!is.character(mf)) { # stop("Must supply a Formula string"); @@ -78,7 +78,7 @@ data(isotopes) # .jcast(molecularformula,.IMolecularFormula), # TRUE); # } -# +# # D <- new(J("java/lang/Integer"), as.integer(charge)) # .jcall(molecularFormula,"V","setCharge",D); # object <- .cdkFormula.createObject(.jcast(molecularFormula,.IMolecularFormula)); @@ -93,13 +93,13 @@ silentchemobject <- J("org.openscience.cdk.silent.SilentChemObjectBuilder") #' Rewrite the formual object and directly access Java #' get.formula2 <- function(mf) { - + formula <- mfManipulator$getMolecularFormula( - "C2H3", + "C2H3", silentchemobject$getInstance()) - + mfManipulator$getMass(formula) - + } #' Add type hints @@ -109,7 +109,7 @@ get.formula3 <- function(mf) { silentchemobject, "Lorg/openscience/cdk/interfaces/IChemObjectBuilder;", "getInstance") - + formula <- .jcall( mfManipulator, "Lorg/openscience/cdk/interfaces/IMolecularFormula;", @@ -118,7 +118,7 @@ get.formula3 <- function(mf) { builderinstance); mfManipulator$getMass(formula) - + } @@ -155,7 +155,7 @@ benchmark <- microbenchmark::microbenchmark( Rdisop = Rdisop::getMolecule("C2H6O")$exactmass, ChemmineR = ChemmineR::exactMassOB(ChemmineR::smiles2sdf("CCO")), OrgMassSpecR = OrgMassSpecR::MonoisotopicMass(formula = OrgMassSpecR::ListFormula("C2H6O)"), charge = 0), - + CHNOSZ = CHNOSZ::mass("C2H6O"), enviPat = enviPat::isopattern(isotopes, "C2H6O", charge=FALSE, verbose=FALSE)[[1]][1,1] , times=1000L) From 7248e768f0f6ef30a395ea3ff59f1a48b3dbff9b Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Fri, 28 Nov 2025 10:18:12 -0500 Subject: [PATCH 19/27] fix: set CPUs to 1 inside of CHECK environments --- rcdk/R/rcdk.R | 174 +++++++++++++++++++++++++++----------------------- 1 file changed, 94 insertions(+), 80 deletions(-) diff --git a/rcdk/R/rcdk.R b/rcdk/R/rcdk.R index 9897333015..82aae883c9 100755 --- a/rcdk/R/rcdk.R +++ b/rcdk/R/rcdk.R @@ -1,19 +1,19 @@ .packageName <- "rcdk" #' Get the default chemical object builder. -#' +#' #' The CDK employs a builder design pattern to construct #' instances of new chemical objects (e.g., atoms, bonds, parsers -#' and so on). Many methods require an instance of a builder +#' and so on). Many methods require an instance of a builder #' object to function. While most functions in this package handle #' this internally, it is useful to be able to get an instance of #' a builder object when directly working with the CDK API via #' `rJava`. -#' -#' This method returns an instance of the \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder}. -#' Note that this is a static object that is created at package load time, +#' +#' This method returns an instance of the \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder}. +#' Note that this is a static object that is created at package load time, #' and the same instance is returned whenever this function is called. -#' +#' #' @return An instance of \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder} #' @export #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) @@ -55,14 +55,28 @@ get.chem.object.builder <- function() { } Sys.setenv(NOAWT=1) - + jar.rcdk <- paste(lib,pkg,"cont","rcdk.jar",sep=.Platform$file.sep) jar.png <- paste(lib,pkg,"cont","com.objectplanet.image.PngEncoder.jar",sep=.Platform$file.sep) - .jinit(classpath=c(jar.rcdk,jar.png), parameters="-Djava.awt.headless=true") - + + # Detect if we're running in CRAN check or test environment + # If so, limit JVM to single-threaded execution to avoid parallel processing NOTE + is_check_env <- Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" || + Sys.getenv("_R_CHECK_TIMINGS_", "") != "" || + Sys.getenv("RCMDCHECK") != "" || + identical(Sys.getenv("NOT_CRAN"), "false") + + jvm_params <- "-Djava.awt.headless=true" + if (is_check_env) { + # Limit to single thread during CRAN checks to avoid "CPU time > elapsed time" NOTE + jvm_params <- c(jvm_params, "-XX:ActiveProcessorCount=1") + } + + .jinit(classpath=c(jar.rcdk,jar.png), parameters=jvm_params) + .jcall("java/lang/System", "S", "setProperty", "java.awt.headless", "true") - - # check Java Version + + # check Java Version jv <- .jcall("java/lang/System", "S", "getProperty", "java.runtime.version") if(substr(jv, 1L, 2L) == "1.") { jvn <- as.numeric(paste0(strsplit(jv, "[.]")[[1L]][1:2], collapse = ".")) @@ -79,7 +93,7 @@ get.chem.object.builder <- function() { "Lorg/openscience/cdk/interfaces/IChemObjectBuilder;", "getInstance"), envir = .rcdk.GlobalEnv) assign("mfManipulator", .jnew("org/openscience/cdk/tools/manipulator/MolecularFormulaManipulator"), envir = .rcdk.GlobalEnv) - + # Extract the bond order enums so we can return them without going through # Java each time we want one assign("BOND_ORDER_SINGLE", J("org.openscience.cdk.interfaces.IBond")$Order$SINGLE, @@ -99,7 +113,7 @@ get.chem.object.builder <- function() { } #' Get the current CDK version used in the package. -#' +#' #' @return Returns a character containing the version of the CDK used in this package #' @export #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) @@ -108,11 +122,11 @@ cdk.version <- function() { } #' Remove explicit hydrogens. -#' -#' Create an copy of the original structure with explicit hydrogens removed. -#' Stereochemistry is updated but up and down bonds in a depiction may need +#' +#' Create an copy of the original structure with explicit hydrogens removed. +#' Stereochemistry is updated but up and down bonds in a depiction may need #' to be recalculated. This can also be useful for descriptor calculations. -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @return A copy of the original molecule, with explicit hydrogens removed #' @seealso \code{\link{get.hydrogen.count}}, \code{\link{get.total.hydrogen.count}} @@ -129,12 +143,12 @@ remove.hydrogens <- function(mol) { } #' Get total number of implicit hydrogens in the molecule. -#' -#' Counts the number of hydrogens on the provided molecule. As this method -#' will sum all implicit hydrogens on each atom it is important to ensure -#' the molecule has already been configured (and thus each atom has an -#' implicit hydrogen count). -#' +#' +#' Counts the number of hydrogens on the provided molecule. As this method +#' will sum all implicit hydrogens on each atom it is important to ensure +#' the molecule has already been configured (and thus each atom has an +#' implicit hydrogen count). +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @return An integer representing the total number of implicit hydrogens #' @seealso \code{\link{get.hydrogen.count}}, \code{\link{remove.hydrogens}} @@ -150,40 +164,40 @@ get.total.hydrogen.count <- function(mol) { } #' get.exact.mass -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @export get.exact.mass <- function(mol) { if (!.check.class(mol, "org/openscience/cdk/interfaces/IAtomContainer")) stop("molecule must be of class IAtomContainer") - - + + formulaJ <- .jcall('org/openscience/cdk/tools/manipulator/MolecularFormulaManipulator', "Lorg/openscience/cdk/interfaces/IMolecularFormula;", "getMolecularFormula", mol, use.true.class=FALSE); - - + + ret <- .jcall('org/openscience/cdk/tools/manipulator/MolecularFormulaManipulator', 'D', 'getTotalExactMass', formulaJ, check=FALSE) - + ex <- .jgetEx(clear=TRUE) - - + + if (is.null(ex)) return(ret) else{ print(ex) stop("Couldn't get exact mass. Maybe you have not performed aromaticity, atom type or isotope configuration?") } } - + #' get.natural.mass -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @export get.natural.mass <- function(mol) { @@ -199,17 +213,17 @@ get.natural.mass <- function(mol) { else{ print(ex) stop("Couldn't get natural mass. Maybe you have not performed aromaticity, atom type or isotope configuration?") - } + } } #' get.total.charge -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @export get.total.charge <- function(mol) { if (!.check.class(mol, "org/openscience/cdk/interfaces/IAtomContainer")) stop("molecule must be of class IAtomContainer") - + ## check to see if we have partial charges atoms <- get.atoms(mol) pcharges <- unlist(lapply(atoms, get.charge)) @@ -226,7 +240,7 @@ get.total.charge <- function(mol) { } #' get.total.formal.charge -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @export get.total.formal.charge <- function(mol) { @@ -239,14 +253,14 @@ get.total.formal.charge <- function(mol) { } #' Convert implicit hydrogens to explicit. -#' +#' #' In some cases, a molecule may not have any hydrogens (such as when read #' in from an MDL MOL file that did not have hydrogens or SMILES with no #' explicit hydrogens). In such cases, this method -#' will add implicit hydrogens and then convert them to explicit ones. The +#' will add implicit hydrogens and then convert them to explicit ones. The #' newly added H's will not have any 2D or 3D coordinates associated with them. #' Ensure that the molecule has been typed beforehand. -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @seealso \code{\link{get.hydrogen.count}}, \code{\link{remove.hydrogens}}, \code{\link{set.atom.types}} #' @export @@ -267,8 +281,8 @@ convert.implicit.to.explicit <- function(mol) { #' Get the atoms from a molecule or bond. -#' -#' @param object A `jobjRef` representing either a molecule (`IAtomContainer`) or +#' +#' @param object A `jobjRef` representing either a molecule (`IAtomContainer`) or #' bond (`IBond`) object. #' @return A list of `jobjRef` representing the `IAtom` objects in the molecule or bond #' @seealso \code{\link{get.bonds}}, \code{\link{get.connected.atoms}} @@ -277,7 +291,7 @@ convert.implicit.to.explicit <- function(mol) { get.atoms <- function(object) { if (is.null(attr(object, 'jclass'))) stop("object must be of class IAtomContainer or IObject or IBond") - + if (attr(object, 'jclass') != "org/openscience/cdk/interfaces/IAtomContainer" && attr(object, 'jclass') != "org/openscience/cdk/interfaces/IObject" && attr(object, 'jclass') != "org/openscience/cdk/interfaces/IBond") @@ -291,7 +305,7 @@ get.atoms <- function(object) { } #' Get the bonds in a molecule. -#' +#' #' @param mol A `jobjRef` representing the molecule (`IAtomContainer`) object. #' @return A list of `jobjRef` representing the bonds (`IBond`) objects in the molecule #' @seealso \code{\link{get.atoms}}, \code{\link{get.connected.atoms}} @@ -300,7 +314,7 @@ get.atoms <- function(object) { get.bonds <- function(mol) { if (!.check.class(mol, "org/openscience/cdk/interfaces/IAtomContainer")) stop("molecule must be of class IAtomContainer") - + nbond <- .jcall(mol, "I", "getBondCount") bonds <- list() for (i in 0:(nbond-1)) @@ -309,25 +323,25 @@ get.bonds <- function(mol) { } #' do.aromaticity -#' +#' #' detect aromaticity of an input compound -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @export do.aromaticity do.aromaticity <- function(mol) { if (!.check.class(mol, "org/openscience/cdk/interfaces/IAtomContainer")) stop("molecule must be of class IAtomContainer") - + model <- .jcall("org/openscience/cdk/aromaticity/ElectronDonation", "Lorg/openscience/cdk/aromaticity/ElectronDonation;", "daylight") - cycles.all <- .jcall("org/openscience/cdk/graph/Cycles", + cycles.all <- .jcall("org/openscience/cdk/graph/Cycles", "Lorg/openscience/cdk/graph/CycleFinder;", "all") - cycles.6 <- .jcall("org.openscience.cdk.graph.Cycles", + cycles.6 <- .jcall("org.openscience.cdk.graph.Cycles", "Lorg/openscience/cdk/graph/CycleFinder;", "all", as.integer(6)) - cycles <- .jcall("org.openscience.cdk.graph.Cycles", + cycles <- .jcall("org.openscience.cdk.graph.Cycles", "Lorg/openscience/cdk/graph/CycleFinder;", "or", cycles.all, cycles.6) aromaticity <- .jnew("org/openscience/cdk.aromaticity/Aromaticity", @@ -336,9 +350,9 @@ do.aromaticity <- function(mol) { } #' do.isotopes -#' +#' #' configure isotopes -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @export do.isotopes do.isotopes <- function(mol) { @@ -351,9 +365,9 @@ do.isotopes <- function(mol) { } #' Tests whether the molecule is neutral. -#' +#' #' The test checks whether all atoms in the molecule have a formal charge of 0. -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @return `TRUE` if molecule is neutral, `FALSE` otherwise #' @aliases charge @@ -362,60 +376,60 @@ do.isotopes <- function(mol) { is.neutral <- function(mol) { if (!.check.class(mol, "org/openscience/cdk/interfaces/IAtomContainer")) stop("molecule must be of class IAtomContainer") - + atoms <- get.atoms(mol) fc <- unlist(lapply(atoms, get.formal.charge)) return(all(fc == 0)) } #' Tests whether the molecule is fully connected. -#' -#' A single molecule will be represented as a -#' \href{https://en.wikipedia.org/wiki/Complete_graph}{complete} graph. -#' In some cases, such as for molecules in salt form, or after certain -#' operations such as bond splits, the molecular graph may contained +#' +#' A single molecule will be represented as a +#' \href{https://en.wikipedia.org/wiki/Complete_graph}{complete} graph. +#' In some cases, such as for molecules in salt form, or after certain +#' operations such as bond splits, the molecular graph may contained #' \href{http://mathworld.wolfram.com/DisconnectedGraph.html}{disconnected components}. #' This method can be used to tested whether the molecule is complete (i.e. fully #' connected). -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @return `TRUE` if molecule is complete, `FALSE` otherwise #' @seealso \code{\link{get.largest.component}} #' @export #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) -#' @examples +#' @examples #' m <- parse.smiles("CC.CCCCCC.CCCC")[[1]] #' is.connected(m) is.connected <- function(mol) { if (!.check.class(mol, "org/openscience/cdk/interfaces/IAtomContainer")) stop("molecule must be of class IAtomContainer") - + .jcall("org.openscience.cdk.graph.ConnectivityChecker", "Z", "isConnected", mol) } #' Gets the largest component in a disconnected molecular graph. -#' -#' A molecule may be represented as a +#' +#' A molecule may be represented as a #' \href{http://mathworld.wolfram.com/DisconnectedGraph.html}{disconnected graph}, such as #' when read in as a salt form. This method will return the larges connected component -#' or if there is only a single component (i.e., the molecular graph is +#' or if there is only a single component (i.e., the molecular graph is #' \href{https://en.wikipedia.org/wiki/Complete_graph}{complete} or fully connected), that #' component is returned. -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @return The largest component as an `IAtomContainer` object or else the input molecule itself #' @seealso \code{\link{is.connected}} #' @export #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) -#' @examples +#' @examples #' m <- parse.smiles("CC.CCCCCC.CCCC")[[1]] #' largest <- get.largest.component(m) #' length(get.atoms(largest)) == 6 get.largest.component <- function(mol) { if (!.check.class(mol, "org/openscience/cdk/interfaces/IAtomContainer")) stop("molecule must be of class IAtomContainer") - + isConnected <- .jcall("org.openscience.cdk.graph.ConnectivityChecker", "Z", "isConnected", mol) if (isConnected) return(mol) @@ -440,7 +454,7 @@ get.largest.component <- function(mol) { } #' Get the number of atoms in the molecule. -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @return An integer representing the number of atoms in the molecule #' @export @@ -452,10 +466,10 @@ get.atom.count <- function(mol) { } #' Get the title of the molecule. -#' +#' #' Some molecules may not have a title (such as when parsing in a SMILES #' with not title). -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @return A character string with the title, `NA` is no title is specified #' @seealso \code{\link{set.title}} @@ -468,7 +482,7 @@ get.title <- function(mol) { } #' Set the title of the molecule. -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @param title The title of the molecule as a character string. This will overwrite #' any pre-existing title. The default value is an empty string. @@ -482,17 +496,17 @@ set.title <- function(mol, title = "") { } #' Generate 2D coordinates for a molecule. -#' +#' #' Some file formats such as SMILES do not support 2D (or 3D) coordinates #' for the atoms. Other formats such as SD or MOL have support for coordinates -#' but may not include them. This method will generate reasonable 2D coordinates +#' but may not include them. This method will generate reasonable 2D coordinates #' based purely on connectivity information, overwriting -#' any existing coordinates if present. -#' +#' any existing coordinates if present. +#' #' Note that when depicting a molecule (\code{\link{view.molecule.2d}}), 2D coordinates #' are generated, but since it does not modify the input molecule, we do not have access #' to the generated coordinates. -#' +#' #' @param mol The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @return The input molecule, with 2D coordinates added #' @seealso \code{\link{get.point2d}}, \code{\link{view.molecule.2d}} @@ -501,7 +515,7 @@ set.title <- function(mol, title = "") { generate.2d.coordinates <- function(mol) { if (!.check.class(mol, "org/openscience/cdk/interfaces/IAtomContainer")) stop("molecule must be of class IAtomContainer") - + .jcall('org/guha/rcdk/util/Misc', 'Lorg/openscience/cdk/interfaces/IAtomContainer;', 'getMoleculeWithCoordinates', mol) } From e770d6b2838e75d2e92a469e5f66327a7a22c5cc Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Fri, 28 Nov 2025 10:19:03 -0500 Subject: [PATCH 20/27] fix: run all tests during the check now that CPU usage has been limited in CHECK envs --- rcdk/inst/unitTests/runit.fingerprints.R | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/rcdk/inst/unitTests/runit.fingerprints.R b/rcdk/inst/unitTests/runit.fingerprints.R index 08f22e8067..1600ae8cf5 100755 --- a/rcdk/inst/unitTests/runit.fingerprints.R +++ b/rcdk/inst/unitTests/runit.fingerprints.R @@ -3,17 +3,10 @@ test.fp <- function() { fp <- get.fingerprint(mol, type='maccs') checkTrue(length(fp@bits) > 0) - # Skip slow tests during CRAN check (use multiple detection methods) - is_cran_check <- Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" || - Sys.getenv("_R_CHECK_TIMINGS_", "") != "" || - identical(Sys.getenv("NOT_CRAN"), "false") - - if (!is_cran_check) { - fp <- get.fingerprint(mol, type='kr') - checkTrue(length(fp@bits) > 0) - fp <- get.fingerprint(mol, type='shortestpath') - checkTrue(length(fp@bits) > 0) - } + fp <- get.fingerprint(mol, type='kr') + checkTrue(length(fp@bits) > 0) + fp <- get.fingerprint(mol, type='shortestpath') + checkTrue(length(fp@bits) > 0) } # Substructure test are inspired by the test for the substructure fingerprints in CDK From 07f09b5c60bb80163d6a5b0c835d3710c12543a4 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Fri, 28 Nov 2025 10:19:51 -0500 Subject: [PATCH 21/27] fix update CRAN submission docs --- rcdk/cran-comments.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rcdk/cran-comments.md b/rcdk/cran-comments.md index 04c749215e..120bd73dd6 100755 --- a/rcdk/cran-comments.md +++ b/rcdk/cran-comments.md @@ -1,5 +1,6 @@ ## General Comments +- 4th resubmission. Fixed the final URL issue. More long test avoidance on CRAN/linux - 3rd resubmission. Fixed the final URL issue. More long test avoidance on CRAN/linux - 2nd resubmission. The URL issue seems to be related to CACHE; I've slimmed the Unit tests to fit in time. - resubmission with a number of small URL fixes @@ -12,4 +13,4 @@ ── R CMD check results ─────────────────────────────── rcdk 3.8.2 ──── Duration: 25.5s -0 errors ✔ | 0 warnings ✔ | 0 notes ✔ \ No newline at end of file +0 errors ✔ | 0 warnings ✔ | 0 notes ✔ From 74ef3b4256158259dd4ac24ed6c447be719f98fb Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Fri, 28 Nov 2025 10:37:42 -0500 Subject: [PATCH 22/27] spelling --- rcdk/DESCRIPTION | 2 +- rcdk/NEWS.md | 6 ++++-- rcdk/R/frags.R | 2 +- rcdk/R/props.R | 2 +- rcdk/R/visual.R | 2 +- rcdk/vignettes/PerformanceNotes.Rmd | 2 +- rcdk/vignettes/rcdk.Rmd | 6 +++--- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/rcdk/DESCRIPTION b/rcdk/DESCRIPTION index baecd60e25..fad44d42fc 100755 --- a/rcdk/DESCRIPTION +++ b/rcdk/DESCRIPTION @@ -33,7 +33,7 @@ LazyData: true SystemRequirements: Java (>= 8) BugReports: https://github.com/CDK-R/cdkr/issues Description: Allows the user to access functionality in the - 'CDK', a Java framework for chemoinformatics. This allows the user to load + 'CDK', a Java framework for cheminformatics. This allows the user to load molecules, evaluate fingerprints, calculate molecular descriptors and so on. In addition, the 'CDK' API allows the user to view structures in 2D. RoxygenNote: 7.3.3 diff --git a/rcdk/NEWS.md b/rcdk/NEWS.md index fa4bf8ca0f..5c84a31333 100755 --- a/rcdk/NEWS.md +++ b/rcdk/NEWS.md @@ -13,9 +13,11 @@ * Update rCDK to work with rcdklibs 2.8 + + # rcdk 3.6.0 -* Fix code to handle changes to JDK17. Notably, I needed to reduce the use of the J notation in a nubmer of places in favor of direct calls. +* Fix code to handle changes to JDK17. Notably, I needed to reduce the use of the J notation in a number of places in favor of direct calls. * formally deprecated `do.typing` in favor of `set.atom.types` * Updated handling of atomic descriptors to resolve a name mismatch bug * Added a test case for atomic descriptors (thanks to Francesca Di Cesare) @@ -30,7 +32,7 @@ # rcdk 3.5.0 -* update to RCDKlibs 2.3. This changes uderlying AtomContainer defualt to Atomcontainer2 and also has new support for mass spec mass functions. On the rcdk side we have moved to a tidyverse documentation and build system. +* update to RCDKlibs 2.3. This changes underlying AtomContainer default to Atomcontainer2 and also has new support for mass spec mass functions. On the rcdk side we have moved to a tidyverse documentation and build system. # rcdk 3.4.7 diff --git a/rcdk/R/frags.R b/rcdk/R/frags.R index c199018c49..5a802a2b19 100755 --- a/rcdk/R/frags.R +++ b/rcdk/R/frags.R @@ -22,7 +22,7 @@ #' `frameworks`. Each of these elements is either a character vector of SMILES strings or a list of #' `IAtomContainer` objects. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com}) -#' @seealso [get.exhuastive.fragments()] +#' @seealso [get.exhaustive.fragments()] #' @export #' @examples #' mol <- parse.smiles('c1ccc(cc1)CN(c2cc(ccc2[N+](=O)[O-])c3c(nc(nc3CC)N)N)C')[[1]] diff --git a/rcdk/R/props.R b/rcdk/R/props.R index 15d1415432..874a812151 100755 --- a/rcdk/R/props.R +++ b/rcdk/R/props.R @@ -151,7 +151,7 @@ get.properties <- function(molecule) { #' Remove a property associated with a molecule. #' #' In this context a property is a value associated with a key and stored -#' with the molecule. This methd will remove the property defined by the key. +#' with the molecule. This method will remove the property defined by the key. #' If there is such key, a warning is raised. #' #' @param molecule The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` diff --git a/rcdk/R/visual.R b/rcdk/R/visual.R index a57e25c159..d8b7b08ffd 100755 --- a/rcdk/R/visual.R +++ b/rcdk/R/visual.R @@ -83,7 +83,7 @@ get.depictor <- function(width = 200, height = 200, zoom = 1.3, style = "cow", a #' view.molecule.2d #' #' Create a 2D depiction of a molecule. If there are more than -#' one molecules supplied, return a grid woth \code{ncol} columns,. +#' one molecules supplied, return a grid with \code{ncol} columns,. #' #' @param molecule The molecule to query. Should be a `jobjRef` representing an `IAtomContainer` #' @param ncol Default \code{4} diff --git a/rcdk/vignettes/PerformanceNotes.Rmd b/rcdk/vignettes/PerformanceNotes.Rmd index 5fae5a8db6..4e4c57cdb5 100644 --- a/rcdk/vignettes/PerformanceNotes.Rmd +++ b/rcdk/vignettes/PerformanceNotes.Rmd @@ -13,7 +13,7 @@ vignette: > ## rCDK Performance -In September 2022, of this year, [Stepehn Neumann](https://gist.github.com/sneumann) +In September 2022, of this year, [Stephen Neumann](https://gist.github.com/sneumann) [created a benchmark](https://gist.github.com/sneumann/959a6d205ea4ac73eaf1393da0ec0673) for moecular weight calculation that he [announced on twitter](https://x.com/sneumannoffice/status/1570070283083710465?s=20&t=RqJR3Bbh-DEcbCf2tWUEBQ) showing that rCDK had dismal performance relative to other tools in the R ecosystem. Something seemed a bit off so I looked into the code. What I discovered is that the mass spec calculations were mediated by R classes instead of accessing the underlying Java code directly and if you write a function that does diff --git a/rcdk/vignettes/rcdk.Rmd b/rcdk/vignettes/rcdk.Rmd index f23f8014c8..5446fa9d32 100644 --- a/rcdk/vignettes/rcdk.Rmd +++ b/rcdk/vignettes/rcdk.Rmd @@ -85,7 +85,7 @@ are pretty opaque to the user and are really meant to be processed using methods from the `rcdk` or [rJava](https://CRAN.R-project.org/package=rJava) packages. However, since it loads all the molecules from the specified file into a list, -large files can lead to out of memory errors. In such a situtation it is preferable +large files can lead to out of memory errors. In such a situation it is preferable to iterate over the file, one structure at a time. Currently this behavior is supported for SDF and SMILES files. An example of such a usage for a large SD file would be @@ -130,7 +130,7 @@ SDF. To write molecules to a disk file in SDF format. ```{r eval=FALSE} write.molecules(mols, filename='mymols.sdf') ``` -By default, if mols is a list of multiple molecules, all of them will be written +By default, if `mols` is a list of multiple molecules, all of them will be written to a single SDF file. If this is not desired, you can write each on to individual files (which are prefixed by the value of filename): @@ -205,7 +205,7 @@ view.molecule.2d(mols[[5]], depictor=depictor) #view.molecule.2d(mols[[5]], depictor=depictor) ``` -The method also allows you to highlight substructures using [SMARTS](https://en.wikipedia.org/wiki/Smiles_arbitrary_target_specification). This is useful in highlight commen substructures in a set of molecules +The method also allows you to highlight substructures using [SMARTS](https://en.wikipedia.org/wiki/Smiles_arbitrary_target_specification). This is useful in highlight common substructures in a set of molecules ```{r eval=FALSE} depictor <- get.depictor(style='cob', abbr='reagents', sma='N(C)(C)') view.molecule.2d(mols, depictor=depictor) From b3a945b15049b4ffc66350b650d6361fe32cfb57 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Fri, 28 Nov 2025 10:38:47 -0500 Subject: [PATCH 23/27] update the date --- rcdk/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rcdk/DESCRIPTION b/rcdk/DESCRIPTION index fad44d42fc..fe4479358b 100755 --- a/rcdk/DESCRIPTION +++ b/rcdk/DESCRIPTION @@ -1,6 +1,6 @@ Package: rcdk Version: 3.8.2 -Date: 2025-09-10 +Date: 2025-11-28 Title: Interface to the 'CDK' Libraries Authors@R: c( person('Rajarshi', 'Guha', ,'rajarshi.guha@gmail.com', role=c('aut',"cph"), From a6aa5479bb6eb59e8f8870535240126ab7025fe2 Mon Sep 17 00:00:00 2001 From: Egon Willighagen Date: Sat, 29 Nov 2025 09:52:12 +0100 Subject: [PATCH 24/27] Correct first name --- rcdk/vignettes/PerformanceNotes.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rcdk/vignettes/PerformanceNotes.Rmd b/rcdk/vignettes/PerformanceNotes.Rmd index 4e4c57cdb5..689d37d3d4 100644 --- a/rcdk/vignettes/PerformanceNotes.Rmd +++ b/rcdk/vignettes/PerformanceNotes.Rmd @@ -13,7 +13,7 @@ vignette: > ## rCDK Performance -In September 2022, of this year, [Stephen Neumann](https://gist.github.com/sneumann) +In September 2022, of this year, [Steffen Neumann](https://gist.github.com/sneumann) [created a benchmark](https://gist.github.com/sneumann/959a6d205ea4ac73eaf1393da0ec0673) for moecular weight calculation that he [announced on twitter](https://x.com/sneumannoffice/status/1570070283083710465?s=20&t=RqJR3Bbh-DEcbCf2tWUEBQ) showing that rCDK had dismal performance relative to other tools in the R ecosystem. Something seemed a bit off so I looked into the code. What I discovered is that the mass spec calculations were mediated by R classes instead of accessing the underlying Java code directly and if you write a function that does From 28c58a4b8cb006f1683a8c5081a6bf8e4f713e19 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 30 Nov 2025 10:27:41 -0500 Subject: [PATCH 25/27] Setup JAVA for single threaded testing to avoid triggering warning on CRAN --- rcdk/tests/doRUnit.R | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/rcdk/tests/doRUnit.R b/rcdk/tests/doRUnit.R index 2ff858d1d2..648bb9695b 100755 --- a/rcdk/tests/doRUnit.R +++ b/rcdk/tests/doRUnit.R @@ -1,10 +1,33 @@ - if(require("RUnit", quietly=TRUE)) { +# Set JAVA_TOOL_OPTIONS BEFORE any library loads (including RUnit) +# This will force JVM to use single-threaded execution during tests +# to avoid CRAN NOTE: "CPU time > elapsed time" +java_opts <- paste( + "-XX:ActiveProcessorCount=1", + "-XX:ParallelGCThreads=1", + "-XX:ConcGCThreads=1", + "-XX:+UseSerialGC", + "-XX:CICompilerCount=1", + "-XX:-TieredCompilation", + "-XX:-BackgroundCompilation", + "-Djava.util.concurrent.ForkJoinPool.common.parallelism=1", + sep=" " +) +Sys.setenv("JAVA_TOOL_OPTIONS" = java_opts) +Sys.setenv("_JAVA_OPTIONS" = java_opts) + +# Also set thread-limiting environment variables for native libraries +Sys.setenv("OMP_NUM_THREADS" = "1") +Sys.setenv("OPENBLAS_NUM_THREADS" = "1") +Sys.setenv("MKL_NUM_THREADS" = "1") +Sys.setenv("VECLIB_MAXIMUM_THREADS" = "1") + +if(require("RUnit", quietly=TRUE)) { library(rJava) library(fingerprint) ## --- Setup --- - + pkg <- "rcdk" # <-- Change to package name! if(Sys.getenv("RCMDCHECK") == "FALSE") { ## Path to unit tests for standalone running under Makefile (not R CMD check) @@ -17,26 +40,26 @@ } cat("\nRunning unit tests\n") print(list(pkg=pkg, getwd=getwd(), pathToUnitTests=path)) - + library(package=pkg, character.only=TRUE) - + ## If desired, load the name space to allow testing of private functions ## if (is.element(pkg, loadedNamespaces())) ## attach(loadNamespace(pkg), name=paste("namespace", pkg, sep=":"), pos=3) ## ## or simply call PKG:::myPrivateFunction() in tests - + ## --- Testing --- - + ## Define tests testSuite <- defineTestSuite(name=paste(pkg, "rcdk Unit Tests"), dirs=path) ## Run tests <- runTestSuite(testSuite) - + ## Default report name #pathReport <- file.path(path, "report") - + ## Report to stdout and text files cat("------------------- UNIT TEST SUMMARY ---------------------\n\n") printTextProtocol(tests, showDetails=FALSE) @@ -44,10 +67,10 @@ # fileName=paste(pathReport, "Summary.txt", sep="")) #printTextProtocol(tests, showDetails=TRUE, # fileName=paste(pathReport, ".txt", sep="")) - + ## Report to HTML file #printHTMLProtocol(tests, fileName=paste(pathReport, ".html", sep="")) - + ## Return stop() to cause R CMD check stop in case of ## - failures i.e. FALSE to unit tests or ## - errors i.e. R errors From 6a2e141f921506045b9253e4e3fbadd9d2457ee5 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 30 Nov 2025 10:28:53 -0500 Subject: [PATCH 26/27] prep for resubmission --- rcdk/DESCRIPTION | 2 +- rcdk/cran-comments.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/rcdk/DESCRIPTION b/rcdk/DESCRIPTION index fe4479358b..cefbb405b3 100755 --- a/rcdk/DESCRIPTION +++ b/rcdk/DESCRIPTION @@ -1,6 +1,6 @@ Package: rcdk Version: 3.8.2 -Date: 2025-11-28 +Date: 2025-11-30 Title: Interface to the 'CDK' Libraries Authors@R: c( person('Rajarshi', 'Guha', ,'rajarshi.guha@gmail.com', role=c('aut',"cph"), diff --git a/rcdk/cran-comments.md b/rcdk/cran-comments.md index 120bd73dd6..76041df249 100755 --- a/rcdk/cran-comments.md +++ b/rcdk/cran-comments.md @@ -1,5 +1,6 @@ ## General Comments +- 5th resubmission. Addressed an issue with multithreading on Linux - 4th resubmission. Fixed the final URL issue. More long test avoidance on CRAN/linux - 3rd resubmission. Fixed the final URL issue. More long test avoidance on CRAN/linux - 2nd resubmission. The URL issue seems to be related to CACHE; I've slimmed the Unit tests to fit in time. From e4336431fb4fc55c90aa007b8cbba60b08321a34 Mon Sep 17 00:00:00 2001 From: Zachary Charlop-Powers Date: Sun, 30 Nov 2025 10:31:26 -0500 Subject: [PATCH 27/27] rebuild docs --- rcdk/R/rcdk.R | 33 ++++++++++++++++++++---- rcdk/inst/unitTests/runit.match.R | 12 +++++++++ rcdk/inst/unitTests/runit.rcdk.R | 33 +++++++++++++----------- rcdk/man/convert.implicit.to.explicit.Rd | 2 +- rcdk/man/generate.2d.coordinates.Rd | 2 +- rcdk/man/get.atoms.Rd | 2 +- rcdk/man/get.chem.object.builder.Rd | 6 ++--- rcdk/man/get.largest.component.Rd | 4 +-- rcdk/man/get.murcko.fragments.Rd | 2 +- rcdk/man/get.total.hydrogen.count.Rd | 6 ++--- rcdk/man/is.connected.Rd | 8 +++--- rcdk/man/remove.hydrogens.Rd | 4 +-- rcdk/man/remove.property.Rd | 2 +- rcdk/man/view.molecule.2d.Rd | 2 +- 14 files changed, 78 insertions(+), 40 deletions(-) diff --git a/rcdk/R/rcdk.R b/rcdk/R/rcdk.R index 82aae883c9..7ac45a8874 100755 --- a/rcdk/R/rcdk.R +++ b/rcdk/R/rcdk.R @@ -59,23 +59,46 @@ get.chem.object.builder <- function() { jar.rcdk <- paste(lib,pkg,"cont","rcdk.jar",sep=.Platform$file.sep) jar.png <- paste(lib,pkg,"cont","com.objectplanet.image.PngEncoder.jar",sep=.Platform$file.sep) - # Detect if we're running in CRAN check or test environment + # Detect if we're running in R CMD check environment # If so, limit JVM to single-threaded execution to avoid parallel processing NOTE + # Only use definitive _R_CHECK_ variables that are only set during R CMD check is_check_env <- Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" || - Sys.getenv("_R_CHECK_TIMINGS_", "") != "" || - Sys.getenv("RCMDCHECK") != "" || - identical(Sys.getenv("NOT_CRAN"), "false") + Sys.getenv("_R_CHECK_TIMINGS_", "") != "" + + # Set environment variables BEFORE initializing JVM to limit threading + if (is_check_env) { + Sys.setenv("OMP_NUM_THREADS" = "1") + Sys.setenv("OPENBLAS_NUM_THREADS" = "1") + Sys.setenv("MKL_NUM_THREADS" = "1") + Sys.setenv("VECLIB_MAXIMUM_THREADS" = "1") + Sys.setenv("NUMEXPR_NUM_THREADS" = "1") + } jvm_params <- "-Djava.awt.headless=true" if (is_check_env) { # Limit to single thread during CRAN checks to avoid "CPU time > elapsed time" NOTE - jvm_params <- c(jvm_params, "-XX:ActiveProcessorCount=1") + # Use multiple parameters for maximum compatibility across JVM versions + jvm_params <- c(jvm_params, + "-XX:ActiveProcessorCount=1", + "-XX:ParallelGCThreads=1", + "-XX:ConcGCThreads=1", + "-XX:-UseConcMarkSweepGC", + "-XX:-UseParallelGC", + "-XX:+UseSerialGC", + "-Djava.util.concurrent.ForkJoinPool.common.parallelism=1") } .jinit(classpath=c(jar.rcdk,jar.png), parameters=jvm_params) .jcall("java/lang/System", "S", "setProperty", "java.awt.headless", "true") + # Additional runtime thread limiting for CRAN checks + if (is_check_env) { + # Set ForkJoinPool parallelism at runtime (in case JVM param didn't work) + .jcall("java/lang/System", "S", "setProperty", + "java.util.concurrent.ForkJoinPool.common.parallelism", "1") + } + # check Java Version jv <- .jcall("java/lang/System", "S", "getProperty", "java.runtime.version") if(substr(jv, 1L, 2L) == "1.") { diff --git a/rcdk/inst/unitTests/runit.match.R b/rcdk/inst/unitTests/runit.match.R index b2cb12bf05..ced5212c41 100755 --- a/rcdk/inst/unitTests/runit.match.R +++ b/rcdk/inst/unitTests/runit.match.R @@ -42,6 +42,10 @@ test.match4 <- function() test.mcs1 <- function() { + # Skip on CRAN - MCS algorithms are computationally intensive and trigger multi-threading + if (Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" || Sys.getenv("_R_CHECK_TIMINGS_", "") != "") { + return(TRUE) + } mols <- parse.smiles(c("NCc1ccccc1OC(=N)CCN", "c1ccccc1OC(=N)")) lapply(mols, do.aromaticity) lapply(mols, set.atom.types) @@ -51,6 +55,10 @@ test.mcs1 <- function() { } test.mcs3 <- function() { + # Skip on CRAN - MCS algorithms are computationally intensive and trigger multi-threading + if (Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" || Sys.getenv("_R_CHECK_TIMINGS_", "") != "") { + return(TRUE) + } mols <- parse.smiles(c("c1cccc(COC(=O)NC(CC(C)C)C(=O)NC(CCc2ccccc2)C(=O)COC)c1", "c1cccc(COC(=O)NC(CC(C)C)C(=O)NCC#N)c1")) lapply(mols, do.aromaticity) lapply(mols, set.atom.types) @@ -60,6 +68,10 @@ test.mcs3 <- function() { } test.mcs2 <- function() { + # Skip on CRAN - MCS algorithms are computationally intensive and trigger multi-threading + if (Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" || Sys.getenv("_R_CHECK_TIMINGS_", "") != "") { + return(TRUE) + } mols <- parse.smiles(c("NCc1ccccc1OC(=N)CCN", "c1ccccc1OC(=N)")) lapply(mols, do.aromaticity) lapply(mols, set.atom.types) diff --git a/rcdk/inst/unitTests/runit.rcdk.R b/rcdk/inst/unitTests/runit.rcdk.R index c92cbf09fb..516a2efb09 100755 --- a/rcdk/inst/unitTests/runit.rcdk.R +++ b/rcdk/inst/unitTests/runit.rcdk.R @@ -3,9 +3,9 @@ test.is.connected <- function() m <- parse.smiles('CCCC')[[1]] connected <- is.connected(m) checkTrue(connected) - m <- parse.smiles('CCCC.CCCC')[[1]] + m <- parse.smiles('CCCC.CCCC')[[1]] connected <- is.connected(m) - checkTrue(!connected) + checkTrue(!connected) } test.get.largest <- function() { @@ -15,7 +15,7 @@ test.get.largest <- function() { m <- parse.smiles('CCCC.CCCCCC.CC')[[1]] l <- get.largest.component(m) - checkEquals(length(get.atoms(l)), 6) + checkEquals(length(get.atoms(l)), 6) } test.atom.count <- function() { @@ -25,7 +25,7 @@ test.atom.count <- function() { convert.implicit.to.explicit(m) natom <- get.atom.count(m) - checkEquals(natom, 11) + checkEquals(natom, 11) } test.is.neutral <- function() { @@ -46,7 +46,7 @@ test.formula <- function() { do.isotopes(m) convert.implicit.to.explicit(m) f2 <- get.mol2formula(m) - checkEquals(f2@string, "C35H64N3O21P3S") + checkEquals(f2@string, "C35H64N3O21P3S") } test.desc.cats <- function() { @@ -71,13 +71,17 @@ test.desc.calc <- function() { } test.exact.natural.mass <- function() { + # Skip on CRAN - isotope calculations are computationally intensive and trigger multi-threading + if (Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" || Sys.getenv("_R_CHECK_TIMINGS_", "") != "") { + return(TRUE) + } smiles <- c("CCNC1=NC(NC(C)C)=NC(Cl)=N1", # normal atrazine, DTXSID9020112 "[2H]N(CC)C1=NC(=NC(Cl)=N1)N([2H])C(C)C", #2H on implicit locations, DTXSID40892885 "[2H]C([2H])([2H])C([2H])([2H])NC1=NC(Cl)=NC(NC(C)C)=N1", #d5, DTXSID20486781 "CC[15NH]C1=NC(NC(C)C)=NC(Cl)=N1", #15N DTXSID40583908 "OC1=C(Br)C(Br)=C(Br)C(Br)=C1Br", #pentabromophenol, DTXSID9022079 "C[Se]CC[C@H](N)C(O)=O" # Selenium-L-methionine, DTXSID8046824 - ) + ) #atrzine m <- parse.smiles(smiles[1])[[1]] do.aromaticity(m) @@ -87,7 +91,7 @@ test.exact.natural.mass <- function() { # Dashboard ref mass: 215.093773, 215.69 checkEquals(get.exact.mass(m),215.0938, tolerance=1e-6) checkEquals(get.natural.mass(m),215.6835, tolerance=1e-6) - + #deuterium on exchangeable locations m <- parse.smiles(smiles[2])[[1]] do.aromaticity(m) @@ -97,7 +101,7 @@ test.exact.natural.mass <- function() { # Dashboard ref mass: 217.106327, 217.7 checkEquals(get.exact.mass(m),217.1063, tolerance=1e-6) #checkEquals(get.natural.mass(m),215.6835) #this is wrong! It should be 217.7something - + #deuterium on fixed locations m <- parse.smiles(smiles[3])[[1]] do.aromaticity(m) @@ -107,7 +111,7 @@ test.exact.natural.mass <- function() { # Dashboard ref mass: 220.125157, 220.72 checkEquals(get.exact.mass(m),220.1252, tolerance=1e-6) #checkEquals(get.natural.mass(m),215.6835) #this is wrong! It should be 220.7something - + #15N-atrazine m <- parse.smiles(smiles[4])[[1]] do.aromaticity(m) @@ -117,7 +121,7 @@ test.exact.natural.mass <- function() { # Dashboard ref mass: 216.090808, 216.68 checkEqualsNumeric(get.exact.mass(m),216.0908, tolerance=1e-6) #checkEquals(get.natural.mass(m),215.6835) #this is wrong! It should be 216.68something - + #pentabromophenol, DTXSID9022079 - tricky as lots of Br shifts pattern m <- parse.smiles(smiles[5])[[1]] do.aromaticity(m) @@ -126,8 +130,8 @@ test.exact.natural.mass <- function() { convert.implicit.to.explicit(m) # Dashboard ref mass: 483.59443, 488.593 checkEquals(get.exact.mass(m),483.5944, tolerance=1e-6) - checkEquals(get.natural.mass(m),488.5894, tolerance=1e-6) - + checkEquals(get.natural.mass(m),488.5894, tolerance=1e-6) + # Selenium-L-methionine, DTXSID8046824 - tricky as Se primary isotope not lowest mass m <- parse.smiles(smiles[6])[[1]] do.aromaticity(m) @@ -137,7 +141,6 @@ test.exact.natural.mass <- function() { # Dashboard ref mass: 196.995501, 196.119 checkEquals(get.exact.mass(m),196.9955, tolerance=1e-6) checkEquals(get.natural.mass(m),196.1059, tolerance=1e-6) #quite a discrepancy in ref value - - -} + +} diff --git a/rcdk/man/convert.implicit.to.explicit.Rd b/rcdk/man/convert.implicit.to.explicit.Rd index 58baebfde4..7e47dffd0d 100755 --- a/rcdk/man/convert.implicit.to.explicit.Rd +++ b/rcdk/man/convert.implicit.to.explicit.Rd @@ -13,7 +13,7 @@ convert.implicit.to.explicit(mol) In some cases, a molecule may not have any hydrogens (such as when read in from an MDL MOL file that did not have hydrogens or SMILES with no explicit hydrogens). In such cases, this method -will add implicit hydrogens and then convert them to explicit ones. The +will add implicit hydrogens and then convert them to explicit ones. The newly added H's will not have any 2D or 3D coordinates associated with them. Ensure that the molecule has been typed beforehand. } diff --git a/rcdk/man/generate.2d.coordinates.Rd b/rcdk/man/generate.2d.coordinates.Rd index f42b7a333f..0e3702eec6 100755 --- a/rcdk/man/generate.2d.coordinates.Rd +++ b/rcdk/man/generate.2d.coordinates.Rd @@ -15,7 +15,7 @@ The input molecule, with 2D coordinates added \description{ Some file formats such as SMILES do not support 2D (or 3D) coordinates for the atoms. Other formats such as SD or MOL have support for coordinates -but may not include them. This method will generate reasonable 2D coordinates +but may not include them. This method will generate reasonable 2D coordinates based purely on connectivity information, overwriting any existing coordinates if present. } diff --git a/rcdk/man/get.atoms.Rd b/rcdk/man/get.atoms.Rd index d7e6afd7e9..1393a43eb7 100755 --- a/rcdk/man/get.atoms.Rd +++ b/rcdk/man/get.atoms.Rd @@ -7,7 +7,7 @@ get.atoms(object) } \arguments{ -\item{object}{A `jobjRef` representing either a molecule (`IAtomContainer`) or +\item{object}{A `jobjRef` representing either a molecule (`IAtomContainer`) or bond (`IBond`) object.} } \value{ diff --git a/rcdk/man/get.chem.object.builder.Rd b/rcdk/man/get.chem.object.builder.Rd index 8323428128..eff34b5f19 100755 --- a/rcdk/man/get.chem.object.builder.Rd +++ b/rcdk/man/get.chem.object.builder.Rd @@ -12,15 +12,15 @@ An instance of \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk \description{ The CDK employs a builder design pattern to construct instances of new chemical objects (e.g., atoms, bonds, parsers -and so on). Many methods require an instance of a builder +and so on). Many methods require an instance of a builder object to function. While most functions in this package handle this internally, it is useful to be able to get an instance of a builder object when directly working with the CDK API via `rJava`. } \details{ -This method returns an instance of the \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder}. -Note that this is a static object that is created at package load time, +This method returns an instance of the \href{https://cdk.github.io/cdk/2.10/docs/api/org/openscience/cdk/silent/SilentChemObjectBuilder.html}{SilentChemObjectBuilder}. +Note that this is a static object that is created at package load time, and the same instance is returned whenever this function is called. } \author{ diff --git a/rcdk/man/get.largest.component.Rd b/rcdk/man/get.largest.component.Rd index 4c41cceb02..3dde3445be 100755 --- a/rcdk/man/get.largest.component.Rd +++ b/rcdk/man/get.largest.component.Rd @@ -13,10 +13,10 @@ get.largest.component(mol) The largest component as an `IAtomContainer` object or else the input molecule itself } \description{ -A molecule may be represented as a +A molecule may be represented as a \href{http://mathworld.wolfram.com/DisconnectedGraph.html}{disconnected graph}, such as when read in as a salt form. This method will return the larges connected component -or if there is only a single component (i.e., the molecular graph is +or if there is only a single component (i.e., the molecular graph is \href{https://en.wikipedia.org/wiki/Complete_graph}{complete} or fully connected), that component is returned. } diff --git a/rcdk/man/get.murcko.fragments.Rd b/rcdk/man/get.murcko.fragments.Rd index 65e2880e35..fcd233d189 100755 --- a/rcdk/man/get.murcko.fragments.Rd +++ b/rcdk/man/get.murcko.fragments.Rd @@ -47,7 +47,7 @@ mf1 <- get.murcko.fragments(mol, as.smiles=TRUE, single.framework=TRUE) mf1 <- get.murcko.fragments(mol, as.smiles=TRUE, single.framework=FALSE) } \seealso{ -[get.exhuastive.fragments()] +[get.exhaustive.fragments()] } \author{ Rajarshi Guha (\email{rajarshi.guha@gmail.com}) diff --git a/rcdk/man/get.total.hydrogen.count.Rd b/rcdk/man/get.total.hydrogen.count.Rd index a6a8fd9d2a..fc25cba992 100755 --- a/rcdk/man/get.total.hydrogen.count.Rd +++ b/rcdk/man/get.total.hydrogen.count.Rd @@ -13,9 +13,9 @@ get.total.hydrogen.count(mol) An integer representing the total number of implicit hydrogens } \description{ -Counts the number of hydrogens on the provided molecule. As this method -will sum all implicit hydrogens on each atom it is important to ensure -the molecule has already been configured (and thus each atom has an +Counts the number of hydrogens on the provided molecule. As this method +will sum all implicit hydrogens on each atom it is important to ensure +the molecule has already been configured (and thus each atom has an implicit hydrogen count). } \seealso{ diff --git a/rcdk/man/is.connected.Rd b/rcdk/man/is.connected.Rd index b68f34b5ec..c14d17a00f 100755 --- a/rcdk/man/is.connected.Rd +++ b/rcdk/man/is.connected.Rd @@ -13,10 +13,10 @@ is.connected(mol) `TRUE` if molecule is complete, `FALSE` otherwise } \description{ -A single molecule will be represented as a -\href{https://en.wikipedia.org/wiki/Complete_graph}{complete} graph. -In some cases, such as for molecules in salt form, or after certain -operations such as bond splits, the molecular graph may contained +A single molecule will be represented as a +\href{https://en.wikipedia.org/wiki/Complete_graph}{complete} graph. +In some cases, such as for molecules in salt form, or after certain +operations such as bond splits, the molecular graph may contained \href{http://mathworld.wolfram.com/DisconnectedGraph.html}{disconnected components}. This method can be used to tested whether the molecule is complete (i.e. fully connected). diff --git a/rcdk/man/remove.hydrogens.Rd b/rcdk/man/remove.hydrogens.Rd index a4e6fa9b02..fa9b85d686 100755 --- a/rcdk/man/remove.hydrogens.Rd +++ b/rcdk/man/remove.hydrogens.Rd @@ -13,8 +13,8 @@ remove.hydrogens(mol) A copy of the original molecule, with explicit hydrogens removed } \description{ -Create an copy of the original structure with explicit hydrogens removed. -Stereochemistry is updated but up and down bonds in a depiction may need +Create an copy of the original structure with explicit hydrogens removed. +Stereochemistry is updated but up and down bonds in a depiction may need to be recalculated. This can also be useful for descriptor calculations. } \seealso{ diff --git a/rcdk/man/remove.property.Rd b/rcdk/man/remove.property.Rd index 2b548d8df2..6a335c11f6 100755 --- a/rcdk/man/remove.property.Rd +++ b/rcdk/man/remove.property.Rd @@ -13,7 +13,7 @@ remove.property(molecule, key) } \description{ In this context a property is a value associated with a key and stored -with the molecule. This methd will remove the property defined by the key. +with the molecule. This method will remove the property defined by the key. If there is such key, a warning is raised. } \examples{ diff --git a/rcdk/man/view.molecule.2d.Rd b/rcdk/man/view.molecule.2d.Rd index c36946b5ba..ac29496888 100755 --- a/rcdk/man/view.molecule.2d.Rd +++ b/rcdk/man/view.molecule.2d.Rd @@ -25,5 +25,5 @@ view.molecule.2d( } \description{ Create a 2D depiction of a molecule. If there are more than -one molecules supplied, return a grid woth \code{ncol} columns,. +one molecules supplied, return a grid with \code{ncol} columns,. }