diff --git a/DESCRIPTION b/DESCRIPTION index 8032fda..13190fc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BIGapp Title: Breeding Insight Genomics Shiny Application -Version: 1.5.2 +Version: 1.6.0 Authors@R: c( person(c("Alexander", "M."), "Sandercock", diff --git a/R/GS_functions.R b/R/GS_functions.R index ddd2340..6576756 100644 --- a/R/GS_functions.R +++ b/R/GS_functions.R @@ -241,6 +241,11 @@ get_relationship_mat <- function(geno_input, ped_file, type = c("Gmatrix", "Amat ploidy.correction = TRUE) G.mat <- round(G.mat,3) #to be easy to invert + # Ensure G.mat and Ped.mat have the same sample order + common_samples <- intersect(rownames(G.mat), rownames(Ped.mat)) + G.mat <- G.mat[common_samples, common_samples] + Ped.mat <- Ped.mat[common_samples, common_samples] + #Computing H matrix (Martini) - Using the name Geno.mat for consistency Geno.mat <- Hmatrix(A=Ped.mat, G=G.mat, method="Martini", @@ -314,19 +319,31 @@ GBLUP_genomic_prediction <- function(pheno_dat, Geno.mat, cycles, folds, traits, #Mask phenotypes in testing group Pheno_test <- pheno_dat Pheno_test[test, traits[trait_idx]] <- NA - #Kin.blup - traitpred <- kin.blup(data = Pheno_test, - geno = colnames(pheno_dat)[1], - pheno = traits[trait_idx], - fixed = fixed_cat, - covariate = fixed_cov, - K=Geno.mat, - n.core = cores) + + # Add error handling for kin.blup + traitpred <- tryCatch({ + kin.blup(data = Pheno_test, + geno = colnames(pheno_dat)[1], # This assumes first column is sample ID + pheno = traits[trait_idx], + fixed = fixed_cat, + covariate = fixed_cov, + K = Geno.mat, + n.core = cores) + }, error = function(e) { + cat("Error in kin.blup for trait", traits[trait_idx], ":", e$message, "\n") + return(NULL) + }) + + # Check if kin.blup failed + if (is.null(traitpred)) { + cat("kin.blup failed for trait", traits[trait_idx], "- skipping this trait\n") + next + } #Cor between test values and predicted breeding values - results[(((r-1)*5)+fold), trait_idx] <- cor(pheno_dat[test, traits[trait_idx]], traitpred$g[test], use = "complete.obs") - results[(((r-1)*5)+fold), (length(traits)+1)] <- r - results[(((r-1)*5)+fold), (length(traits)+2)] <- fold + results[(((r-1)*folds)+fold), trait_idx] <- cor(pheno_dat[test, traits[trait_idx]], traitpred$g[test], use = "complete.obs") + results[(((r-1)*folds)+fold), (length(traits)+1)] <- r + results[(((r-1)*folds)+fold), (length(traits)+2)] <- fold # Extract GEBVs GEBVs_fold[, trait_idx] <- traitpred$g[test] @@ -334,12 +351,12 @@ GBLUP_genomic_prediction <- function(pheno_dat, Geno.mat, cycles, folds, traits, # Calculate heritability (*confirm this calculation* - either way will not report to user) Vu <- traitpred$Vg Ve <- traitpred$Ve - heritability_scores[(((r-1)*5)+fold), trait_idx] <- Vu / (Vu + Ve) + heritability_scores[(((r-1)*folds)+fold), trait_idx] <- Vu / (Vu + Ve) } #Add iter and fold information for each trait/result - heritability_scores[(((r-1)*5)+fold), (length(traits)+1)] <- r - heritability_scores[(((r-1)*5)+fold), (length(traits)+2)] <- fold + heritability_scores[(((r-1)*folds)+fold), (length(traits)+1)] <- r + heritability_scores[(((r-1)*folds)+fold), (length(traits)+2)] <- fold #Add sample, iteration, and fold information to GEBVs_fold GEBVs_fold[,"Iter"] = r @@ -348,6 +365,12 @@ GBLUP_genomic_prediction <- function(pheno_dat, Geno.mat, cycles, folds, traits, # Store GEBVs for this fold GEBVs_cycle[[fold]] <- GEBVs_fold + + # Update progress bar within the fold loop + if(!is.null(session)) { + updateProgressBar(session = session, id = "pb_prediction", value = as.numeric(pb_value), + title = paste0("Iteration ", r, " of ", cycles, " - Fold ", fold, " of ", folds)) + } } # Store GEBVs for this cycle diff --git a/R/mod_GSAcc.R b/R/mod_GSAcc.R index 8f5a788..df9ab30 100644 --- a/R/mod_GSAcc.R +++ b/R/mod_GSAcc.R @@ -428,30 +428,32 @@ mod_GSAcc_server <- function(input, output, session, parent_session){ pred_inputs <- eventReactive(input$prediction_start,{ toggleClass(id = "pred_ploidy", class = "borderred", condition = (is.na(input$pred_ploidy) | is.null(input$pred_ploidy))) - - #### VCF sanity check - checks <- vcf_sanity_check(input$pred_file$datapath) - - error_if_false <- c( - "VCF_header", "VCF_columns", "unique_FORMAT", "GT", - "samples", "VCF_compressed" - ) - - error_if_true <- c( - "multiallelics", "phased_GT", "mixed_ploidies", - "duplicated_samples", "duplicated_markers" - ) - - warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers") - - checks_result <- vcf_sanity_messages(checks, - error_if_false, - error_if_true, - warning_if_false = warning_if_false, - warning_if_true = NULL, - input_ploidy = as.numeric(input$pred_ploidy)) - - if(checks_result) return() # Stop the analysis if checks fail + + if (advanced_options$pred_matrix != "Amatrix"){ + #### VCF sanity check + checks <- vcf_sanity_check(input$pred_file$datapath) + + error_if_false <- c( + "VCF_header", "VCF_columns", "unique_FORMAT", "GT", + "samples", "VCF_compressed" + ) + + error_if_true <- c( + "multiallelics", "phased_GT", "mixed_ploidies", + "duplicated_samples", "duplicated_markers" + ) + + warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers") + + checks_result <- vcf_sanity_messages(checks, + error_if_false, + error_if_true, + warning_if_false = warning_if_false, + warning_if_true = NULL, + input_ploidy = as.numeric(input$pred_ploidy)) + + if(checks_result) return() # Stop the analysis if checks fail + } ######### diff --git a/tests/testthat/test-DosageCall.R b/tests/testthat/test-DosageCall.R index ef8a912..de6f7fd 100644 --- a/tests/testthat/test-DosageCall.R +++ b/tests/testthat/test-DosageCall.R @@ -18,10 +18,10 @@ test_that("Dosage Calling from MADC file",{ # Call the get_counts function with the specified MADC file path and output file path #Status - result_df <- get_counts(madc_file, output_name) + result_df <- BIGapp:::get_counts(madc_file, output_name) #Call the get_matrices function - matrices <- get_matrices(result_df) + matrices <- BIGapp:::get_matrices(result_df) mout <- updog::multidog(refmat = matrices$ref_matrix, sizemat = matrices$size_matrix, @@ -29,11 +29,11 @@ test_that("Dosage Calling from MADC file",{ model = model_select, nc = cores) - expect_equal(sum(mout$snpdf$bias), 402.7979, tolerance = 0.01) - expect_equal(sum(mout$inddf$postmean), 95229.13, tolerance = 0.01) + expect_equal(sum(mout$snpdf$bias), 412.6139, tolerance = 0.01) + expect_equal(sum(mout$inddf$postmean), 95233.1, tolerance = 0.01) # Convert updog to VCF - updog2vcf( + BIGr:::updog2vcf( multidog.object = mout, output.file = output_name, updog_version = packageVersion("updog"), @@ -69,7 +69,7 @@ test_that("Dosage Calling from VCF file",{ #Get items in FORMAT column info <- vcf@gt[1,"FORMAT"] #Getting the first row FORMAT - info_ids <- extract_info_ids(info[1]) + info_ids <- BIGapp:::extract_info_ids(info[1]) chrom <- vcf@fix[,1] pos <- vcf@fix[,2] @@ -96,8 +96,8 @@ test_that("Dosage Calling from VCF file",{ model = model_select, nc = cores) - expect_equal(sum(mout$snpdf$bias), 402.79, tolerance = 0.01) - expect_equal(sum(mout$inddf$postmean), 95229.13, tolerance = 0.01) + expect_equal(sum(mout$snpdf$bias), 412.6139, tolerance = 0.01) + expect_equal(sum(mout$inddf$postmean), 95233.1, tolerance = 0.01) # Convert updog to VCF BIGr::updog2vcf( @@ -164,7 +164,7 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{ #Get items in FORMAT column info <- vcf@gt[1,"FORMAT"] #Getting the first row FORMAT - info_ids <- extract_info_ids(info[1]) + info_ids <- BIGapp:::extract_info_ids(info[1]) chrom <- vcf@fix[,1] pos <- vcf@fix[,2] @@ -221,11 +221,11 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{ model = input$updog_model, nc = cores) - expect_equal(sum(mout$snpdf$bias), 408.5401, tolerance = 0.01) - expect_equal(sum(mout$inddf$postmean), 94249.05, tolerance = 0.01) + expect_equal(sum(mout$snpdf$bias), 440.109, tolerance = 0.01) + expect_equal(sum(mout$inddf$postmean), 94611.59, tolerance = 0.01) # Convert updog to VCF - updog2vcf( + BIGr::updog2vcf( multidog.object = mout, output.file = output_name, updog_version = packageVersion("updog"), diff --git a/tests/testthat/test-GSAcc.R b/tests/testthat/test-GSAcc.R index 3ed7dcb..a2009fa 100644 --- a/tests/testthat/test-GSAcc.R +++ b/tests/testthat/test-GSAcc.R @@ -138,14 +138,14 @@ test_that("test Predictive Ability iris",{ # Calculate the average value for each column in the traits list for each SampleID, ignoring Iter and Fold average_gebvs_df <- results$GEBVs %>% group_by(Sample) %>% - summarize(across(all_of(input$pred_trait_info), mean, na.rm = TRUE)) - + summarize(across(all_of(input$pred_trait_info), \(x) mean(x, na.rm = TRUE))) + pred_outputs_rrBLUP$avg_GEBVs <- average_gebvs_df columns <- setdiff(colnames(results$PredictionAccuracy), c("Iter","Fold")) average_accuracy_df <- results$PredictionAccuracy %>% group_by(Iter) %>% - summarize(across(all_of(columns), mean, na.rm = TRUE)) + summarize(across(all_of(columns), \(x) mean(x, na.rm = TRUE))) pred_outputs_rrBLUP$comb_output <- average_accuracy_df