| 1 |
#' @title An R6 class for chemical entities with associated data |
|
| 2 |
#' |
|
| 3 |
#' @description The class is initialised with an identifier. Chemical |
|
| 4 |
#' information is retrieved from the internet. Additionally, it can be |
|
| 5 |
#' generated using RDKit if RDKit and its python bindings are installed. |
|
| 6 |
#' |
|
| 7 |
#' @export |
|
| 8 |
#' @format An [R6Class] generator object |
|
| 9 |
#' @importFrom R6 R6Class |
|
| 10 |
#' @importFrom utils URLencode |
|
| 11 |
#' @importFrom webchem get_cid cid_compinfo |
|
| 12 |
#' @importFrom grImport PostScriptTrace readPicture |
|
| 13 |
#' @importFrom yaml yaml.load_file |
|
| 14 |
#' @importFrom rsvg rsvg_ps |
|
| 15 |
#' @param identifier Identifier to be stored in the object |
|
| 16 |
#' @param smiles Optional user provided SMILES code |
|
| 17 |
#' @param inchikey Optional user provided InChI Key |
|
| 18 |
#' @param pubchem Should an attempt be made to retrieve chemical |
|
| 19 |
#' information from PubChem via the webchem package? |
|
| 20 |
#' @param pubchem_from Possibility to select the argument |
|
| 21 |
#' that is used to query pubchem |
|
| 22 |
#' @param rdkit Should an attempt be made to retrieve chemical |
|
| 23 |
#' information from a local rdkit installation via python |
|
| 24 |
#' and the reticulate package? |
|
| 25 |
#' @param template An optional SMILES code to be used as template for RDKit |
|
| 26 |
#' @param chyaml Should we look for a identifier.yaml file in the working |
|
| 27 |
#' directory? |
|
| 28 |
#' @param T Temperature |
|
| 29 |
#' @param pH pH value |
|
| 30 |
#' @param source An acronym specifying the source of the information |
|
| 31 |
#' @param page The page from which the information was taken |
|
| 32 |
#' @param soils Names of the soils |
|
| 33 |
#' @param type The soil type |
|
| 34 |
#' @param pH_orig The pH stated in the study |
|
| 35 |
#' @param pH_medium The medium in which this pH was measured |
|
| 36 |
#' @param pH_H2O The pH extrapolated to pure water |
|
| 37 |
#' @param perc_OC The percentage of organic carbon in the soil |
|
| 38 |
#' @param pages The pages from which the information was taken |
|
| 39 |
#' @param remark A remark |
|
| 40 |
#' @param file The file to write to |
|
| 41 |
#' @param dir The directory to write the file to |
|
| 42 |
#' @examples |
|
| 43 |
#' caffeine <- chent$new("caffeine")
|
|
| 44 |
#' print(caffeine) |
|
| 45 |
#' if (!is.null(caffeine$Picture)) {
|
|
| 46 |
#' plot(caffeine) |
|
| 47 |
#' } |
|
| 48 |
#' oct <- chent$new("1-octanol", smiles = "CCCCCCCCO", pubchem = FALSE)
|
|
| 49 |
#' print(oct) |
|
| 50 |
chent <- R6Class("chent",
|
|
| 51 |
public = list( |
|
| 52 |
#' @field identifier (`character(1)`)\cr |
|
| 53 |
#' The identifier that was used to initiate the object, with attribute 'source' |
|
| 54 |
identifier = NULL, |
|
| 55 | ||
| 56 |
#' @field inchikey (`character(1)`)\cr |
|
| 57 |
#' InChI Key, with attribute 'source' |
|
| 58 |
inchikey = NULL, |
|
| 59 | ||
| 60 |
#' @field smiles (`character()`)\cr |
|
| 61 |
#' SMILES code(s), with attribute 'source' |
|
| 62 |
smiles = NULL, |
|
| 63 | ||
| 64 |
#' @field mw (`numeric(1)`)\cr |
|
| 65 |
#' Molecular weight, with attribute 'source' |
|
| 66 |
mw = NULL, |
|
| 67 | ||
| 68 |
#' @field pubchem (`list()`)\cr |
|
| 69 |
#' List of information retrieved from PubChem |
|
| 70 |
pubchem = NULL, |
|
| 71 | ||
| 72 |
#' @field rdkit |
|
| 73 |
#' List of information obtained with RDKit |
|
| 74 |
rdkit = NULL, |
|
| 75 | ||
| 76 |
#' @field mol <rdkit.Chem.rdchem.Mol> object |
|
| 77 |
mol = NULL, |
|
| 78 | ||
| 79 |
#' @field svg SVG code |
|
| 80 |
svg = NULL, |
|
| 81 | ||
| 82 |
#' @field Picture Graph as a [grImport::Picture-class] object obtained using the grImport package |
|
| 83 |
Picture = NULL, |
|
| 84 | ||
| 85 |
#' @field Pict_font_size Font size as extracted from the intermediate PostScript file |
|
| 86 |
Pict_font_size = NULL, |
|
| 87 | ||
| 88 |
#' @field pdf_height Height of the MediaBox in the pdf after cropping |
|
| 89 |
pdf_height = NULL, |
|
| 90 | ||
| 91 |
#' @field p0 Vapour pressure in Pa |
|
| 92 |
p0 = NULL, |
|
| 93 | ||
| 94 |
#' @field cwsat Water solubility in mg/L |
|
| 95 |
cwsat = NULL, |
|
| 96 | ||
| 97 |
#' @field PUF Plant uptake factor |
|
| 98 |
PUF = NULL, |
|
| 99 | ||
| 100 |
#' @field chyaml List of information obtained from a YAML file |
|
| 101 |
chyaml = NULL, |
|
| 102 | ||
| 103 |
#' @description |
|
| 104 |
#' Creates a new instance of this [R6][R6::R6Class] class. |
|
| 105 |
initialize = function(identifier, smiles = NULL, inchikey = NULL, |
|
| 106 |
pubchem = TRUE, pubchem_from = c('name', 'smiles', 'inchikey'),
|
|
| 107 |
rdkit = TRUE, template = NULL, |
|
| 108 |
chyaml = FALSE) {
|
|
| 109 | ||
| 110 | 2x |
self$identifier <- identifier |
| 111 | 2x |
names(self$identifier) <- make.names(identifier) |
| 112 | 2x |
pubchem_from = match.arg(pubchem_from) |
| 113 | ||
| 114 | 2x |
self$smiles <- c(user = smiles) |
| 115 | ||
| 116 | 2x |
if (pubchem) {
|
| 117 | 1x |
if (pubchem_from == 'name') {
|
| 118 | ! |
query = identifier |
| 119 |
} else {
|
|
| 120 | 1x |
query = get(pubchem_from) |
| 121 |
} |
|
| 122 | 1x |
self$try_pubchem(query, from = pubchem_from) |
| 123 |
} |
|
| 124 | ||
| 125 | 2x |
if (rdkit) {
|
| 126 | 1x |
if (rdkit_available) {
|
| 127 | 1x |
self$get_rdkit(template = template) |
| 128 |
} else {
|
|
| 129 | ! |
message("RDKit is not available")
|
| 130 |
} |
|
| 131 |
} |
|
| 132 | ||
| 133 | 2x |
if (chyaml) {
|
| 134 | ! |
self$get_chyaml() |
| 135 |
} |
|
| 136 | ||
| 137 |
# Define main identifiers as NA if still not available |
|
| 138 | 2x |
if (is.null(self$smiles)) {
|
| 139 | ! |
self$smiles <- NA |
| 140 | ! |
attr(self$smiles, "source") <- "user" |
| 141 |
} |
|
| 142 | 2x |
if (is.null(self$inchikey)) {
|
| 143 | 1x |
self$inchikey <- NA |
| 144 | 1x |
attr(self$inchikey, "source") <- "user" |
| 145 |
} |
|
| 146 | 2x |
if (is.null(self$mw)) {
|
| 147 | 1x |
self$mw <- NA |
| 148 | 1x |
attr(self$mw, "source") <- "user" |
| 149 |
} |
|
| 150 | ||
| 151 | 2x |
invisible(self) |
| 152 |
}, |
|
| 153 | ||
| 154 |
#' @description |
|
| 155 |
#' Try to get chemical information from PubChem |
|
| 156 |
#' @param query Query string to be passed to [get_cid][webchem::get_cid] |
|
| 157 |
#' @param from Passed to [get_cid][webchem::get_cid] |
|
| 158 |
try_pubchem = function(query = self$identifier, from = 'name') {
|
|
| 159 | 2x |
message("Querying PubChem for ", from , " ", query, " ...")
|
| 160 | 1x |
if (missing(query)) query <- self$identifier |
| 161 | 2x |
pubchem_result = webchem::get_cid(query, from = from, match = "first") |
| 162 | ||
| 163 | 2x |
if (is.na(pubchem_result[[1, "cid"]])) {
|
| 164 | ! |
message("Querying for ", query, " as ", from, " did not give results at PubChem")
|
| 165 |
} else {
|
|
| 166 | 2x |
self$get_pubchem(pubchem_result[[1, "cid"]]) |
| 167 |
} |
|
| 168 |
}, |
|
| 169 | ||
| 170 |
#' @description |
|
| 171 |
#' Get chemical information from PubChem for a known PubChem CID |
|
| 172 |
#' @param pubchem_cid CID |
|
| 173 |
get_pubchem = function(pubchem_cid) {
|
|
| 174 | 2x |
self$pubchem = as.list(webchem::pc_prop(pubchem_cid, from = "cid", |
| 175 | 2x |
properties = c("MolecularFormula", "MolecularWeight",
|
| 176 | 2x |
"ConnectivitySMILES", "SMILES", |
| 177 | 2x |
"InChI", "InChIKey", "IUPACName", |
| 178 | 2x |
"XLogP", "TPSA", "Complexity", "Charge", |
| 179 | 2x |
"HBondDonorCount", "HBondAcceptorCount"))) |
| 180 | 2x |
self$pubchem$synonyms = webchem::pc_synonyms(pubchem_cid, from = "cid")[[1]] |
| 181 | ||
| 182 | 2x |
self$smiles["PubChem"] <- self$pubchem$SMILES |
| 183 | ||
| 184 | 2x |
if (self$pubchem$SMILES != self$pubchem$ConnectivitySMILES) {
|
| 185 | ! |
self$smiles["PubChem_Connectivity"] <- self$pubchem$ConnectivitySMILES |
| 186 |
} |
|
| 187 | ||
| 188 | 2x |
self$mw = as.numeric(self$pubchem$MolecularWeight) |
| 189 | 2x |
attr(self$mw, "source") <- "pubchem" |
| 190 | ||
| 191 | 2x |
if (is.null(self$inchikey)) {
|
| 192 | ! |
self$inchikey <- self$pubchem$InChIKey |
| 193 | ! |
attr(self$inchikey, "source") <- "pubchem" |
| 194 |
} else {
|
|
| 195 | 2x |
if (is.na(self$inchikey)) {
|
| 196 | 1x |
warning("Overwriting uninitialized InChIKey")
|
| 197 | 1x |
self$inchikey <- self$pubchem$InChIKey |
| 198 | 1x |
attr(self$inchikey, "source") <- "pubchem" |
| 199 |
} else {
|
|
| 200 | 1x |
if (length(self$inchikey) > 1) {
|
| 201 | ! |
message("InChIKey ", self$inchikey, " retreived from ",
|
| 202 | ! |
attr(self$inchikey, "source"), |
| 203 | ! |
" has length > 1, using PubChem InChIKey") |
| 204 | ! |
self$inchikey <- self$pubchem$InChIKey |
| 205 | ! |
attr(self$inchikey, "source") <- "pubchem" |
| 206 |
} else {
|
|
| 207 | 1x |
if (self$pubchem$InChIKey != self$inchikey) {
|
| 208 | ! |
message("InChiKey ", self$pubchem$InChIKey, " from PubChem record does not match\n",
|
| 209 | ! |
"InChiKey ", self$inchikey, " retreived from ", |
| 210 | ! |
attr(self$inchikey, "source")) |
| 211 |
} else {
|
|
| 212 | 1x |
attr(self$inchikey, "source") <- c(attr(self$inchikey, "source"), "pubchem") |
| 213 |
} |
|
| 214 |
} |
|
| 215 |
} |
|
| 216 |
} |
|
| 217 |
}, |
|
| 218 | ||
| 219 |
#' @description |
|
| 220 |
#' Get chemical information from RDKit if available |
|
| 221 |
get_rdkit = function(template = NULL) {
|
|
| 222 |
|
|
| 223 | ! |
if (!rdkit_available) stop("RDKit is not available")
|
| 224 | ! |
if (is.null(self$smiles)) stop("RDKit would need a SMILES code")
|
| 225 |
|
|
| 226 | 2x |
available_smiles <- names(self$smiles) |
| 227 | 2x |
smiles_preference <- c("user", "PubChem", "PubChem_Connectivity")
|
| 228 | 2x |
smiles_preferred_i <- min(match(available_smiles, smiles_preference)) |
| 229 | 2x |
smiles_preferred <- smiles_preference[smiles_preferred_i] |
| 230 | ||
| 231 | 2x |
message("Get chemical information from RDKit using ",
|
| 232 | 2x |
smiles_preferred, " SMILES\n", |
| 233 | 2x |
self$smiles[smiles_preferred]) |
| 234 | 2x |
self$rdkit <- list() |
| 235 | 2x |
self$mol <- rdkit_module$Chem$MolFromSmiles(self$smiles[1]) |
| 236 | 2x |
self$rdkit$mw <- rdkit_module$Chem$Descriptors$MolWt(self$mol) |
| 237 | 2x |
if (!is.null(self$mw) && !is.na(self$mw)) {
|
| 238 | 2x |
if (round(self$rdkit$mw, 1) != round(self$mw, 1)) {
|
| 239 | ! |
warning("RDKit mw is ", self$rdkit$mw, " while mw is ", self$mw)
|
| 240 |
} |
|
| 241 |
} else {
|
|
| 242 | ! |
self$mw <- self$rdkit$mw |
| 243 | ! |
attr(self$mw, "source") <- "rdkit" |
| 244 |
} |
|
| 245 | ||
| 246 |
# Create an SVG representation |
|
| 247 | 2x |
rdkit_module$Chem$rdDepictor$Compute2DCoords(self$mol) |
| 248 | 2x |
if (!is.null(template)) {
|
| 249 | ! |
rdkit_template <- rdkit_module$Chem$MolFromSmiles(template) |
| 250 | ! |
rdkit_module$Chem$rdDepictor$Compute2DCoords(template) |
| 251 | ! |
rdkit$Chem$AllChem$GenerateDepictionMatching2DStructure(self$mol, template) |
| 252 |
} |
|
| 253 | 2x |
d2d <- rdkit_module$Chem$Draw$rdMolDraw2D$MolDraw2DSVG(400L, 400L) |
| 254 | 2x |
d2d$DrawMolecule(self$mol) |
| 255 | 2x |
d2d$FinishDrawing() |
| 256 | 2x |
self$svg <- d2d$GetDrawingText() |
| 257 | 2x |
svgfile <- tempfile(fileext = ".svg") |
| 258 | 2x |
psfile <- tempfile(fileext = ".ps") |
| 259 | 2x |
writeLines(self$svg, svgfile) |
| 260 | 2x |
rsvg::rsvg_ps(svgfile, psfile) |
| 261 | ||
| 262 |
# Get size properties useful for plotting |
|
| 263 | 2x |
ps_font_line <- grep("Tm$", readLines(psfile), value = TRUE)[1]
|
| 264 | 2x |
ps_font_size <- gsub(" .*$", "", ps_font_line)
|
| 265 | 2x |
self$Pict_font_size = as.numeric(ps_font_size) |
| 266 | ||
| 267 |
# Read in to create Picture |
|
| 268 | 2x |
xmlfile <- tempfile(fileext = ".xml") |
| 269 | 2x |
PostScriptTrace(psfile, outfilename = xmlfile) |
| 270 | 2x |
unlink(paste0("capture", basename(psfile)))
|
| 271 | 2x |
self$Picture <- readPicture(xmlfile) |
| 272 | 2x |
unlink(c(xmlfile, psfile, svgfile)) |
| 273 |
}, |
|
| 274 | ||
| 275 |
#' @description |
|
| 276 |
#' Obtain information from a YAML file |
|
| 277 |
#' @param repo Should the file be looked for in the current working |
|
| 278 |
#' directory, a local git repository under `~/git/chyaml`, or from |
|
| 279 |
#' the web (not implemented). |
|
| 280 |
#' @param chyaml The filename to be looked for |
|
| 281 |
get_chyaml = function(repo = c("wd", "local", "web"),
|
|
| 282 |
chyaml = paste0(URLencode(self$identifier), ".yaml")) |
|
| 283 |
{
|
|
| 284 | ! |
repo = match.arg(repo) |
| 285 | ! |
paths = c( |
| 286 | ! |
wd = ".", |
| 287 | ! |
local = file.path("~", "git/chyaml"))
|
| 288 | ||
| 289 | ! |
chyaml_handlers = list( |
| 290 | ! |
expr = function(x) NULL, # To avoid security risks from reading chyaml files |
| 291 | ! |
dataframe = function(x) |
| 292 | ! |
eval(parse(text = paste0("data.frame(", x, ", stringsAsFactors = FALSE)"))))
|
| 293 | ||
| 294 | ! |
if (repo %in% c("wd", "local")) {
|
| 295 | ! |
path = paths[repo] |
| 296 | ! |
full = file.path(path, chyaml) |
| 297 | ! |
if (!file.exists(full)) {
|
| 298 | ! |
message("Did not find chyaml file ", full)
|
| 299 |
} else {
|
|
| 300 | ! |
if (is(try(self$chyaml <- yaml.load_file(chyaml, handlers = chyaml_handlers)), |
| 301 | ! |
"try-error")) {
|
| 302 | ! |
message("Could not load ", full)
|
| 303 |
} else {
|
|
| 304 | ! |
message("Loaded ", full)
|
| 305 |
} |
|
| 306 |
} |
|
| 307 |
} else {
|
|
| 308 | ! |
message("web repositories not implemented")
|
| 309 |
} |
|
| 310 |
}, |
|
| 311 | ||
| 312 |
#' @description |
|
| 313 |
#' Add a vapour pressure |
|
| 314 |
#' @param p0 The vapour pressure in Pa |
|
| 315 |
add_p0 = function(p0, T = NA, source = NA, page = NA, remark = "") {
|
|
| 316 | ! |
self$p0 <- p0 |
| 317 | ! |
attr(self$p0, "T") <- T |
| 318 | ! |
attr(self$p0, "source") <- source |
| 319 | ! |
attr(self$p0, "page") <- page |
| 320 | ! |
attr(self$p0, "remark") <- remark |
| 321 |
}, |
|
| 322 | ||
| 323 |
#' @description |
|
| 324 |
#' Add a water solubility |
|
| 325 |
#' @param cwsat The water solubility in mg/L |
|
| 326 |
add_cwsat = function(cwsat, T = NA, pH = NA, |
|
| 327 |
source = NA, page = NA, remark = "") |
|
| 328 |
{
|
|
| 329 | ! |
self$cwsat <- cwsat |
| 330 | ! |
attr(self$cwsat, "T") <- T |
| 331 | ! |
attr(self$cwsat, "pH") <- pH |
| 332 | ! |
attr(self$cwsat, "source") <- source |
| 333 | ! |
attr(self$cwsat, "page") <- page |
| 334 | ! |
attr(self$cwsat, "remark") <- remark |
| 335 |
}, |
|
| 336 | ||
| 337 |
#' @description |
|
| 338 |
#' Add a plant uptake factor |
|
| 339 |
#' @param PUF The plant uptake factor, a number between 0 and 1 |
|
| 340 |
add_PUF = function(PUF = 0, |
|
| 341 |
source = "focus_generic_gw_2014", page = 41, |
|
| 342 |
remark = "Conservative default value") |
|
| 343 |
{
|
|
| 344 | ! |
self$PUF <- PUF |
| 345 | ! |
attr(self$PUF, "source") <- source |
| 346 | ! |
attr(self$PUF, "page") <- page |
| 347 | ! |
attr(self$PUF, "remark") <- remark |
| 348 |
}, |
|
| 349 | ||
| 350 |
#' @field TPs List of transformation products as chent objects |
|
| 351 |
TPs = list(), |
|
| 352 | ||
| 353 |
#' @description |
|
| 354 |
#' Add a transformation product to the internal list |
|
| 355 |
#' @param x A [chent] object, or an identifier to generate a [chent] object |
|
| 356 |
#' @param pubchem Should chemical information be obtained from PubChem? |
|
| 357 |
add_TP = function(x, smiles = NULL, pubchem = FALSE) {
|
|
| 358 | ! |
if (inherits(x, "chent")) {
|
| 359 | ! |
id <- names(x$identifier) |
| 360 | ! |
chent <- x |
| 361 |
} else {
|
|
| 362 | ! |
id <- make.names(x) |
| 363 | ! |
chent <- chent$new(x, smiles = smiles, pubchem = pubchem) |
| 364 |
} |
|
| 365 | ! |
self$TPs[[id]] <- chent |
| 366 |
}, |
|
| 367 | ||
| 368 |
#' @field transformations Data frame of observed transformations |
|
| 369 |
transformations = data.frame(study_type = character(0), |
|
| 370 |
TP_identifier = character(0), |
|
| 371 |
max_occurrence = numeric(0), |
|
| 372 |
source = character(0), |
|
| 373 |
page = character(0), |
|
| 374 |
stringsAsFactors = FALSE), |
|
| 375 | ||
| 376 |
#' @description |
|
| 377 |
#' Add a line in the internal dataframe holding observed transformations |
|
| 378 |
#' @param study_type A characterisation of the study type |
|
| 379 |
#' @param TP_identifier An identifier of one of the transformation products |
|
| 380 |
#' in `self$TPs` |
|
| 381 |
#' @param max_occurrence The maximum observed occurrence of the |
|
| 382 |
#' transformation product, expressed as a fraction of the amount that would |
|
| 383 |
#' result from stochiometric transformation |
|
| 384 |
add_transformation = function(study_type, TP_identifier, max_occurrence, |
|
| 385 |
remark = "", source = NA, pages = NA) |
|
| 386 |
{
|
|
| 387 | ! |
TP_name = make.names(TP_identifier) |
| 388 | ! |
if (!inherits(self$TPs[[TP_name]], "chent")) {
|
| 389 | ! |
stop(paste("Please add the TP", TP_identifier, "first using chent$add_TP()"))
|
| 390 |
} |
|
| 391 | ! |
TP_chent <- self$TPs[TP_name] |
| 392 | ! |
if (is.numeric(pages)) pages <- paste(pages, collapse = ", ") |
| 393 | ! |
cn <- colnames(self$transformations) |
| 394 | ! |
self$transformations <- rbind(self$transformations, |
| 395 | ! |
data.frame(study_type = study_type, |
| 396 | ! |
TP_identifier = TP_identifier, |
| 397 | ! |
max_occurrence = max_occurrence, |
| 398 | ! |
remark = remark, |
| 399 | ! |
source = source, |
| 400 | ! |
page = page, |
| 401 | ! |
stringsAsFactors = FALSE)) |
| 402 |
}, |
|
| 403 | ||
| 404 |
#' @field soil_degradation Dataframe of modelling DT50 values |
|
| 405 |
soil_degradation = NULL, |
|
| 406 | ||
| 407 |
#' @description |
|
| 408 |
#' Add a line in the internal dataframe holding modelling DT50 values |
|
| 409 |
#' @param DT50_mod The modelling DT50 in the sense of regulatory pesticide |
|
| 410 |
#' fate modelling |
|
| 411 |
#' @param DT50_mod_ref The normalised modelling DT50 in the sense of |
|
| 412 |
#' regulatory pesticide fate modelling |
|
| 413 |
#' @param country The country (mainly for field studies) |
|
| 414 |
#' @param temperature The temperature during the study in degrees Celsius |
|
| 415 |
#' @param moisture The moisture during the study |
|
| 416 |
#' @param category Is it a laboratory ('lab') or field study ('field')
|
|
| 417 |
#' @param formulation Name of the formulation applied, if it was not |
|
| 418 |
#' the technical active ingredient |
|
| 419 |
#' @param model The degradation model used for deriving `DT50_mod` |
|
| 420 |
#' @param chi2 The relative error as defined in FOCUS kinetics |
|
| 421 |
add_soil_degradation = function(soils, DT50_mod, DT50_mod_ref, |
|
| 422 |
type = NA, country = NA, |
|
| 423 |
pH_orig = NA, pH_medium = NA, pH_H2O = NA, |
|
| 424 |
perc_OC = NA, |
|
| 425 |
temperature = NA, moisture = NA, |
|
| 426 |
category = "lab", formulation = NA, |
|
| 427 |
model = NA, chi2 = NA, |
|
| 428 |
remark = "", source, page = NA) |
|
| 429 |
{
|
|
| 430 | ! |
new_soil_degradation = data.frame( |
| 431 | ! |
soil = soils, |
| 432 | ! |
DT50_mod = DT50_mod, |
| 433 | ! |
DT50_mod_ref = DT50_mod_ref, |
| 434 | ! |
type = type, |
| 435 | ! |
country = country, |
| 436 | ! |
pH_orig = pH_orig, |
| 437 | ! |
pH_medium = pH_medium, |
| 438 | ! |
pH_H2O = pH_H2O, |
| 439 | ! |
perc_OC = perc_OC, |
| 440 | ! |
temperature = temperature, |
| 441 | ! |
moisture = moisture, |
| 442 | ! |
category = category, |
| 443 | ! |
formulation = formulation, |
| 444 | ! |
model = model, |
| 445 | ! |
chi2 = chi2, |
| 446 | ! |
remark = remark, |
| 447 | ! |
source = source, |
| 448 | ! |
page = page, |
| 449 | ! |
stringsAsFactors = FALSE) |
| 450 | ! |
if (is.null(self$soil_degradation)) {
|
| 451 | ! |
self$soil_degradation <- new_soil_degradation |
| 452 |
} else {
|
|
| 453 | ! |
self$soil_degradation <- rbind(self$soil_degradation, new_soil_degradation) |
| 454 |
} |
|
| 455 |
}, |
|
| 456 | ||
| 457 |
#' @field soil_ff Dataframe of formation fractions |
|
| 458 |
soil_ff = NULL, |
|
| 459 | ||
| 460 |
#' @description |
|
| 461 |
#' Add one or more formation fractions for degradation in soil |
|
| 462 |
#' @param target The identifier(s) of the transformation product |
|
| 463 |
#' @param soils The soil name(s) in which the transformation was observed |
|
| 464 |
#' @param ff The formation fraction(s) |
|
| 465 |
add_soil_ff = function(target, soils, ff = 1, |
|
| 466 |
remark = "", source, page = NA) |
|
| 467 |
{
|
|
| 468 | ! |
new_soil_ff = data.frame( |
| 469 | ! |
target = target, |
| 470 | ! |
target = target, |
| 471 | ! |
soil = soils, |
| 472 | ! |
ff = ff, |
| 473 | ! |
remark = remark, |
| 474 | ! |
source = source, |
| 475 | ! |
page = page, |
| 476 | ! |
stringsAsFactors = FALSE) |
| 477 | ! |
if (is.null(self$soil_ff)) {
|
| 478 | ! |
self$soil_ff <- new_soil_ff |
| 479 |
} else {
|
|
| 480 | ! |
self$soil_ff <- rbind(self$soil_ff, new_soil_ff) |
| 481 |
} |
|
| 482 |
}, |
|
| 483 | ||
| 484 |
#' @field soil_sorption Dataframe of soil sorption data |
|
| 485 |
soil_sorption = NULL, |
|
| 486 | ||
| 487 |
#' @description |
|
| 488 |
#' Add soil sorption data |
|
| 489 |
#' @param Kf The sorption constant in L/kg, either linear (then `N` is 1) |
|
| 490 |
#' or according to Freundlich |
|
| 491 |
#' @param Kfoc The constant from above, normalised to soil organic carbon |
|
| 492 |
#' @param N The Freundlich exponent |
|
| 493 |
#' @param perc_clay The percentage of clay in the soil |
|
| 494 |
#' @param CEC The cation exchange capacity |
|
| 495 |
add_soil_sorption = function(soils, |
|
| 496 |
Kf, Kfoc, N, |
|
| 497 |
type = NA, pH_orig = NA, pH_medium = NA, |
|
| 498 |
pH_H2O = NA, |
|
| 499 |
perc_OC = NA, perc_clay = NA, CEC = NA, |
|
| 500 |
remark = "", source, page = NA) |
|
| 501 |
{
|
|
| 502 | ! |
new_soil_sorption = data.frame( |
| 503 | ! |
soils = soils, |
| 504 | ! |
Kf = Kf, Kfoc = Kfoc, N = N, |
| 505 | ! |
type = type, |
| 506 | ! |
pH_orig = pH_orig, |
| 507 | ! |
pH_medium = pH_medium, |
| 508 | ! |
pH_H2O = pH_H2O, |
| 509 | ! |
perc_OC = perc_OC, perc_clay = perc_clay, CEC = CEC, |
| 510 | ! |
remark = remark, |
| 511 | ! |
source = source, |
| 512 | ! |
page = page, |
| 513 | ! |
stringsAsFactors = FALSE) |
| 514 | ! |
if (is.null(self$soil_sorption)) {
|
| 515 | ! |
self$soil_sorption <- new_soil_sorption |
| 516 |
} else {
|
|
| 517 | ! |
self$soil_sorption <- rbind(self$soil_sorption, new_soil_sorption) |
| 518 |
} |
|
| 519 |
}, |
|
| 520 | ||
| 521 |
#' @description |
|
| 522 |
#' Write a PDF image of the structure |
|
| 523 |
pdf = function(file = paste0(self$identifier, ".pdf"), |
|
| 524 |
dir = "structures/pdf", template = NULL) {
|
|
| 525 | ! |
if (!dir.exists(dir)) {
|
| 526 | ! |
message("Directory '", dir, "' does not exist")
|
| 527 | ! |
message("Trying to create directory '", dir, "'")
|
| 528 | ! |
dir.create(dir, recursive = TRUE) |
| 529 |
} |
|
| 530 | ! |
path = file.path(dir, file) |
| 531 | ! |
message("Creating file '", path, "'")
|
| 532 | ! |
pdf(path) |
| 533 | ! |
plot(self) |
| 534 | ! |
dev.off() |
| 535 | ! |
message("Cropping file '", path, "' using pdfcrop")
|
| 536 | ! |
bash_path <- shQuote(path) |
| 537 | ! |
system(paste("pdfcrop --margin 10", bash_path, bash_path, "> /dev/null"))
|
| 538 | ||
| 539 |
# Get the height of the MediaBox |
|
| 540 | ! |
head <- readLines(path, n = 20, skipNul = TRUE) |
| 541 | ! |
m_line <- suppressWarnings(grep("MediaBox", head, value = TRUE))
|
| 542 | ! |
self$pdf_height <- as.numeric(gsub("/MediaBox \\[.* (.*)\\]", "\\1", m_line))
|
| 543 |
}, |
|
| 544 | ||
| 545 |
#' @description |
|
| 546 |
#' Write a PNG image of the structure |
|
| 547 |
#' @param antialias Passed to [png][grDevices::png] |
|
| 548 |
png = function(file = paste0(self$identifier, ".png"), |
|
| 549 |
dir = "structures/png", antialias = 'gray') |
|
| 550 |
{
|
|
| 551 | ! |
if (!dir.exists(dir)) {
|
| 552 | ! |
message("Directory '", dir, "' does not exist")
|
| 553 | ! |
message("Trying to create directory '", dir, "'")
|
| 554 | ! |
dir.create(dir, recursive = TRUE) |
| 555 |
} |
|
| 556 | ! |
path = file.path(dir, file) |
| 557 | ! |
message("Creating file '", path, "'")
|
| 558 | ! |
png(path, antialias = antialias) |
| 559 | ! |
plot(self) |
| 560 | ! |
dev.off() |
| 561 |
}, |
|
| 562 | ||
| 563 |
#' @description |
|
| 564 |
#' Write an EMF image of the structure using [emf][devEMF::emf] |
|
| 565 |
emf = function(file = paste0(self$identifier, ".emf"), |
|
| 566 |
dir = "structures/emf") |
|
| 567 |
{
|
|
| 568 | ! |
if (!requireNamespace("devEMF")) {
|
| 569 | ! |
stop("You need to have the devEMF package installed for this function")
|
| 570 |
} |
|
| 571 | ! |
if (!dir.exists(dir)) {
|
| 572 | ! |
message("Directory '", dir, "' does not exist")
|
| 573 | ! |
message("Trying to create directory '", dir, "'")
|
| 574 | ! |
dir.create(dir, recursive = TRUE) |
| 575 |
} |
|
| 576 | ! |
path = file.path(dir, file) |
| 577 | ! |
message("Creating file '", path, "'")
|
| 578 | ! |
devEMF::emf(path) |
| 579 | ! |
plot(self) |
| 580 | ! |
dev.off() |
| 581 |
} |
|
| 582 |
) |
|
| 583 |
) |
|
| 584 | ||
| 585 |
#' Printing method for chent objects |
|
| 586 |
#' |
|
| 587 |
#' @param x The chent object to be printed |
|
| 588 |
#' @param ... Further arguments for compatibility with the S3 method |
|
| 589 |
#' @importFrom utils head |
|
| 590 |
#' @export |
|
| 591 |
print.chent = function(x, ...) {
|
|
| 592 | 1x |
cat("<chent>\n")
|
| 593 | 1x |
cat("Identifier $identifier", x$identifier, "\n")
|
| 594 | 1x |
cat ("InChI Key $inchikey", x$inchikey, "\n")
|
| 595 | 1x |
cat ("SMILES string $smiles:\n")
|
| 596 | 1x |
print(x$smiles) |
| 597 | 1x |
if (!is.null(x$mw)) cat ("Molecular weight $mw:", round(x$mw, 1), "\n")
|
| 598 | 1x |
if (!is.null(x$pubchem$synonyms)) {
|
| 599 | 1x |
cat ("PubChem synonyms (up to 10):\n")
|
| 600 | 1x |
print(head(x$pubchem$synonyms, n = 10L)) |
| 601 |
} |
|
| 602 |
} |
|
| 603 | ||
| 604 |
#' Draw SVG graph from a chent object using RDKit |
|
| 605 |
#' |
|
| 606 |
#' @param x The chent object to be plotted |
|
| 607 |
#' @param width The desired width in pixels |
|
| 608 |
#' @param height The desired height in pixels |
|
| 609 |
#' @param filename The filename |
|
| 610 |
#' @param subdir The path to which the file should be written |
|
| 611 |
#' @export |
|
| 612 |
draw_svg.chent = function(x, width = 300, height = 150, |
|
| 613 |
filename = paste0(names(x$identifier), ".svg"), |
|
| 614 |
subdir = "svg") {
|
|
| 615 | ! |
if (!rdkit_available) {
|
| 616 | ! |
stop("RDkit is not available via reticulate")
|
| 617 |
} else {
|
|
| 618 | ! |
if (!dir.exists(subdir)) dir.create(subdir) |
| 619 | ! |
mol <- rdkit_module$Chem$MolFromSmiles(x$smiles) |
| 620 | ||
| 621 | ! |
rdkit_module$Chem$Draw$MolToFile(mol, file.path(subdir, filename), |
| 622 | ! |
size = c(as.integer(width), as.integer(height))) |
| 623 |
} |
|
| 624 |
} |
|
| 625 | ||
| 626 |
#' Plot method for chent objects |
|
| 627 |
#' |
|
| 628 |
#' @importFrom grImport grid.picture |
|
| 629 |
#' @param x The chent object to be plotted |
|
| 630 |
#' @param ... Further arguments passed to [grImport::grid.picture] |
|
| 631 |
#' @export |
|
| 632 |
#' @examples |
|
| 633 |
#' caffeine <- chent$new("caffeine")
|
|
| 634 |
#' print(caffeine) |
|
| 635 |
#' if (!is.null(caffeine$Picture)) {
|
|
| 636 |
#' plot(caffeine) |
|
| 637 |
#' } |
|
| 638 |
plot.chent = function(x, ...) {
|
|
| 639 | ! |
if (is.null(x$Picture)) stop("No Picture object in chent, was RDKit available during creation?")
|
| 640 | ! |
grid.picture(x$Picture) |
| 641 |
} |
|
| 642 | ||
| 643 |
#' @title An R6 class for pesticidal active ingredients and associated data |
|
| 644 |
#' |
|
| 645 |
#' @description This class is derived from [chent]. It makes it easy |
|
| 646 |
#' to create a [chent] from the ISO common name of a pesticide active |
|
| 647 |
#' ingredient, and additionally stores the ISO name as well as |
|
| 648 |
#' the complete result of querying the BCPC compendium using |
|
| 649 |
#' [bcpc_query][webchem::bcpc_query]. |
|
| 650 |
#' |
|
| 651 |
#' @export |
|
| 652 |
#' @format An [R6::R6Class] generator object |
|
| 653 |
#' @examples |
|
| 654 |
#' # On Travis, we get a certificate validation error, |
|
| 655 |
#' # likely because the system (xenial) is so old, |
|
| 656 |
#' # therefore don't run this example on Travis |
|
| 657 |
#' if (Sys.getenv("TRAVIS") == "") {
|
|
| 658 |
#' |
|
| 659 |
#' atr <- pai$new("atrazine")
|
|
| 660 |
#' print(atr) |
|
| 661 |
#' if (!is.null(atr$Picture)) {
|
|
| 662 |
#' plot(atr) |
|
| 663 |
#' } |
|
| 664 |
#' |
|
| 665 |
#' } |
|
| 666 |
pai <- R6Class("pai",
|
|
| 667 |
inherit = chent, |
|
| 668 |
public = list( |
|
| 669 | ||
| 670 |
#' @field iso ISO common name of the active ingredient according to ISO 1750 |
|
| 671 |
iso = NULL, |
|
| 672 | ||
| 673 |
#' @field bcpc Information retrieved from the BCPC compendium available online |
|
| 674 |
#' at <pesticidecompendium.bcpc.org> |
|
| 675 |
bcpc = NULL, |
|
| 676 | ||
| 677 |
#' @description |
|
| 678 |
#' Create a new pai object |
|
| 679 |
#' @param iso The ISO common name to be used in the query of the |
|
| 680 |
#' BCPC compendium |
|
| 681 |
#' @param identifier Alternative identifier used for querying pubchem |
|
| 682 |
#' @param smiles Optional user provided SMILES code |
|
| 683 |
#' @param inchikey Optional user provided InChI Key |
|
| 684 |
#' @param bcpc Should the BCPC compendium be queried? |
|
| 685 |
#' @param pubchem Should an attempt be made to retrieve chemical |
|
| 686 |
#' information from PubChem via the webchem package? |
|
| 687 |
#' @param pubchem_from Possibility to select the argument |
|
| 688 |
#' that is used to query pubchem |
|
| 689 |
#' @param rdkit Should an attempt be made to retrieve chemical |
|
| 690 |
#' information from a local rdkit installation via python |
|
| 691 |
#' and the reticulate package? |
|
| 692 |
#' @param template An optional SMILES code to be used as template for RDKit |
|
| 693 |
#' @param chyaml Should we look for a identifier.yaml file in the working |
|
| 694 |
initialize = function(iso, identifier = iso, |
|
| 695 |
smiles = NULL, inchikey = NULL, bcpc = TRUE, |
|
| 696 |
pubchem = TRUE, pubchem_from = 'auto', |
|
| 697 |
rdkit = TRUE, template = NULL, |
|
| 698 |
chyaml = FALSE) |
|
| 699 |
{
|
|
| 700 | ||
| 701 | 1x |
if (!is.null(inchikey)) {
|
| 702 | ! |
self$inchikey = inchikey |
| 703 | ! |
attr(self$inchikey, "source") <- "user" |
| 704 |
} |
|
| 705 | ||
| 706 | 1x |
if (!missing(iso) & bcpc) {
|
| 707 | 1x |
message("Querying BCPC for ", identifier, " ...")
|
| 708 | 1x |
bcpc_result = webchem::bcpc_query(identifier, from = "name") |
| 709 | ||
| 710 |
# Use first element of list, as we passed a query of length one |
|
| 711 | 1x |
if (is.na(bcpc_result[[1]][1])) {
|
| 712 | ! |
message("Common name ", identifier, " is not known at the BCPC compendium, trying PubChem")
|
| 713 |
} else {
|
|
| 714 | 1x |
self$bcpc = bcpc_result[[1]] |
| 715 | 1x |
self$iso = self$bcpc$cname |
| 716 | 1x |
attr(self$iso, "source") <- "bcpc" |
| 717 | 1x |
attr(self$iso, "status") <- self$bcpc$status |
| 718 | 1x |
bcpc_ik = self$bcpc$inchikey |
| 719 | 1x |
if (length(bcpc_ik) == 1 && !is.na(bcpc_ik)) {
|
| 720 | 1x |
if (is.null(self$inchikey)) {
|
| 721 | 1x |
self$inchikey = substr(self$bcpc$inchikey, 1, 27) |
| 722 | 1x |
attr(self$inchikey, "source") <- "bcpc" |
| 723 |
} else {
|
|
| 724 | ! |
if (bcpc_ik == self$inchikey) {
|
| 725 | ! |
attr(self$inchikey, "source") = c(attr(self$inchikey, "source"), "bcpc") |
| 726 |
} else {
|
|
| 727 | ! |
warning("InChIKey ", self$inchikey, " differs from ", bcpc_ik, " obtained from bcpc.org")
|
| 728 |
} |
|
| 729 |
} |
|
| 730 |
} |
|
| 731 |
} |
|
| 732 |
} |
|
| 733 | ||
| 734 |
# Set pubchem_from if not specified |
|
| 735 | 1x |
if (pubchem_from == 'auto') {
|
| 736 | 1x |
pubchem_from = 'name' |
| 737 | 1x |
if (!is.null(self$inchikey)) {
|
| 738 | 1x |
pubchem_from = 'inchikey' |
| 739 |
} |
|
| 740 |
} |
|
| 741 | ||
| 742 | 1x |
super$initialize(identifier = identifier, |
| 743 | 1x |
smiles = smiles, inchikey = self$inchikey, |
| 744 | 1x |
pubchem = pubchem, pubchem_from = pubchem_from, |
| 745 | 1x |
rdkit = rdkit, template = template, chyaml = chyaml) |
| 746 | ||
| 747 | 1x |
invisible(self) |
| 748 |
} |
|
| 749 |
) |
|
| 750 |
) |
|
| 751 | ||
| 752 |
#' Printing method for pai objects (pesticidal active ingredients) |
|
| 753 |
#' |
|
| 754 |
#' @param x The chent object to be printed |
|
| 755 |
#' @param ... Further arguments for compatibility with the S3 method |
|
| 756 |
#' @export |
|
| 757 |
print.pai = function(x, ...) {
|
|
| 758 | ! |
cat("<pai> with ISO common name $iso", x$iso, "\n")
|
| 759 | ! |
print.chent(x) |
| 760 | ! |
if (length(x$TPs) > 0) {
|
| 761 | ! |
cat("\nTransformation products:\n")
|
| 762 | ! |
print(x$TPs) |
| 763 |
} |
|
| 764 | ! |
if (nrow(x$transformations) > 0) {
|
| 765 | ! |
cat("\nTransformations:\n")
|
| 766 | ! |
print(x$transformations) |
| 767 |
} |
|
| 768 |
} |
|
| 769 | ||
| 770 |
#' @title R6 class for a plant protection product with at least one active ingredient |
|
| 771 |
#' |
|
| 772 |
#' @description Contains basic information about the active ingredients in the |
|
| 773 |
#' product |
|
| 774 |
#' |
|
| 775 |
#' @export |
|
| 776 |
#' @format An [R6::R6Class] generator object. |
|
| 777 |
ppp <- R6Class("ppp",
|
|
| 778 |
public = list( |
|
| 779 | ||
| 780 |
#' @field name The name of the product |
|
| 781 |
name = NULL, |
|
| 782 | ||
| 783 |
#' @field ais A list of active ingredients |
|
| 784 |
ais = list(), |
|
| 785 | ||
| 786 |
#' @field concentrations The concentration of the ais |
|
| 787 |
concentrations = NULL, |
|
| 788 | ||
| 789 |
#' @field concentration_units Defaults to g/L |
|
| 790 |
concentration_units = NULL, |
|
| 791 | ||
| 792 |
#' @field density The density of the product |
|
| 793 |
density = NULL, |
|
| 794 | ||
| 795 |
#' @field density_units Defaults to g/L |
|
| 796 |
density_units = "g/L", |
|
| 797 | ||
| 798 |
#' @description |
|
| 799 |
#' Creates a new instance of this [R6][R6::R6Class] class. |
|
| 800 |
#' @param name The name of the product |
|
| 801 |
#' @param ... Identifiers of the active ingredients |
|
| 802 |
#' @param concentrations Concentrations of the active ingredients |
|
| 803 |
#' @param concentration_units Defaults to g/L |
|
| 804 |
#' @param density The density |
|
| 805 |
#' @param density_units Defaults to g/L |
|
| 806 |
initialize = function(name, ..., concentrations, concentration_units = "g/L", |
|
| 807 |
density = 1000, density_units = "g/L") |
|
| 808 |
{
|
|
| 809 | ! |
self$name <- name |
| 810 | ! |
self$ais <- list(...) |
| 811 | ! |
self$concentrations <- concentrations |
| 812 | ! |
self$density <- density |
| 813 | ! |
self$density_units <- density_units |
| 814 | ! |
names(self$concentrations) <- names(self$ais) |
| 815 | ! |
self$concentration_units <- concentration_units |
| 816 |
} |
|
| 817 |
) |
|
| 818 |
) |
|
| 819 | ||
| 820 |
#' Printing method for ppp objects (plant protection products) |
|
| 821 |
#' |
|
| 822 |
#' @param x The chent object to be printed |
|
| 823 |
#' @param ... Further arguments for compatibility with the S3 method |
|
| 824 |
#' @export |
|
| 825 |
print.ppp = function(x, ...) {
|
|
| 826 | ! |
cat("<pp> named", x$name, "\n")
|
| 827 |
} |
|
| 828 |
# vim: set ts=2 sw=2 expandtab: |
| 1 |
.onLoad = function(libname, pkgname) {
|
|
| 2 | ! |
conf <- reticulate::py_discover_config("rdkit")
|
| 3 | ! |
rdkit_available <- reticulate::py_module_available("rdkit")
|
| 4 | ! |
rdkit_module <- try( |
| 5 | ! |
reticulate::import("rdkit"),
|
| 6 | ! |
silent = TRUE) |
| 7 | ! |
assign('rdkit_available', rdkit_available, envir = topenv())
|
| 8 | ! |
assign('rdkit_module', rdkit_module, envir = topenv())
|
| 9 |
} |