Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BIGapp
Title: Breeding Insight Genomics Shiny Application
Version: 1.5.2
Version: 1.6.0
Authors@R:
c(
person(c("Alexander", "M."), "Sandercock",
Expand Down
51 changes: 37 additions & 14 deletions R/GS_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,11 @@ get_relationship_mat <- function(geno_input, ped_file, type = c("Gmatrix", "Amat
ploidy.correction = TRUE)
G.mat <- round(G.mat,3) #to be easy to invert

# Ensure G.mat and Ped.mat have the same sample order
common_samples <- intersect(rownames(G.mat), rownames(Ped.mat))
G.mat <- G.mat[common_samples, common_samples]
Ped.mat <- Ped.mat[common_samples, common_samples]
Comment on lines +244 to +247
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sample alignment logic is redundant. Both G.mat (line 237) and Ped.mat (line 233) are already filtered to the same valid_ids from the phenotype file just before this code. The intersect here will always return the same samples unless there's a bug in the earlier filtering. Consider adding validation that the samples already match instead of re-filtering, or document why this additional alignment is necessary.

Suggested change
# Ensure G.mat and Ped.mat have the same sample order
common_samples <- intersect(rownames(G.mat), rownames(Ped.mat))
G.mat <- G.mat[common_samples, common_samples]
Ped.mat <- Ped.mat[common_samples, common_samples]
# Validate that G.mat and Ped.mat have the same sample order
if (!identical(rownames(G.mat), rownames(Ped.mat))) {
stop("Sample alignment error: G.mat and Ped.mat rownames do not match after filtering to valid_ids.")
}

Copilot uses AI. Check for mistakes.

#Computing H matrix (Martini) - Using the name Geno.mat for consistency
Geno.mat <- Hmatrix(A=Ped.mat, G=G.mat,
method="Martini",
Expand Down Expand Up @@ -314,32 +319,44 @@ GBLUP_genomic_prediction <- function(pheno_dat, Geno.mat, cycles, folds, traits,
#Mask phenotypes in testing group
Pheno_test <- pheno_dat
Pheno_test[test, traits[trait_idx]] <- NA
#Kin.blup
traitpred <- kin.blup(data = Pheno_test,
geno = colnames(pheno_dat)[1],
pheno = traits[trait_idx],
fixed = fixed_cat,
covariate = fixed_cov,
K=Geno.mat,
n.core = cores)

# Add error handling for kin.blup
traitpred <- tryCatch({
kin.blup(data = Pheno_test,
geno = colnames(pheno_dat)[1], # This assumes first column is sample ID
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment states 'This assumes first column is sample ID' but this assumption is not validated. If the assumption is violated, the error handling will catch it but the error message won't indicate the root cause. Consider validating this assumption earlier in the function or making it an explicit parameter.

Copilot uses AI. Check for mistakes.
pheno = traits[trait_idx],
fixed = fixed_cat,
covariate = fixed_cov,
K = Geno.mat,
n.core = cores)
}, error = function(e) {
cat("Error in kin.blup for trait", traits[trait_idx], ":", e$message, "\n")
return(NULL)
})

# Check if kin.blup failed
if (is.null(traitpred)) {
cat("kin.blup failed for trait", traits[trait_idx], "- skipping this trait\n")
next
Comment on lines +338 to +340
Copy link

Copilot AI Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using next to skip a failed trait will leave NA values in the results matrix for that trait in this fold, but the code continues to populate subsequent folds. This creates incomplete data structures where some rows have results and others don't, potentially causing issues in downstream analysis. Consider either: (1) stopping the entire analysis with an error, (2) filling with explicit NA or sentinel values, or (3) documenting this behavior clearly.

Copilot uses AI. Check for mistakes.
}

#Cor between test values and predicted breeding values
results[(((r-1)*5)+fold), trait_idx] <- cor(pheno_dat[test, traits[trait_idx]], traitpred$g[test], use = "complete.obs")
results[(((r-1)*5)+fold), (length(traits)+1)] <- r
results[(((r-1)*5)+fold), (length(traits)+2)] <- fold
results[(((r-1)*folds)+fold), trait_idx] <- cor(pheno_dat[test, traits[trait_idx]], traitpred$g[test], use = "complete.obs")
results[(((r-1)*folds)+fold), (length(traits)+1)] <- r
results[(((r-1)*folds)+fold), (length(traits)+2)] <- fold

# Extract GEBVs
GEBVs_fold[, trait_idx] <- traitpred$g[test]

# Calculate heritability (*confirm this calculation* - either way will not report to user)
Vu <- traitpred$Vg
Ve <- traitpred$Ve
heritability_scores[(((r-1)*5)+fold), trait_idx] <- Vu / (Vu + Ve)
heritability_scores[(((r-1)*folds)+fold), trait_idx] <- Vu / (Vu + Ve)
}

#Add iter and fold information for each trait/result
heritability_scores[(((r-1)*5)+fold), (length(traits)+1)] <- r
heritability_scores[(((r-1)*5)+fold), (length(traits)+2)] <- fold
heritability_scores[(((r-1)*folds)+fold), (length(traits)+1)] <- r
heritability_scores[(((r-1)*folds)+fold), (length(traits)+2)] <- fold

#Add sample, iteration, and fold information to GEBVs_fold
GEBVs_fold[,"Iter"] = r
Expand All @@ -348,6 +365,12 @@ GBLUP_genomic_prediction <- function(pheno_dat, Geno.mat, cycles, folds, traits,

# Store GEBVs for this fold
GEBVs_cycle[[fold]] <- GEBVs_fold

# Update progress bar within the fold loop
if(!is.null(session)) {
updateProgressBar(session = session, id = "pb_prediction", value = as.numeric(pb_value),
title = paste0("Iteration ", r, " of ", cycles, " - Fold ", fold, " of ", folds))
}
}

# Store GEBVs for this cycle
Expand Down
50 changes: 26 additions & 24 deletions R/mod_GSAcc.R
Original file line number Diff line number Diff line change
Expand Up @@ -428,30 +428,32 @@ mod_GSAcc_server <- function(input, output, session, parent_session){
pred_inputs <- eventReactive(input$prediction_start,{

toggleClass(id = "pred_ploidy", class = "borderred", condition = (is.na(input$pred_ploidy) | is.null(input$pred_ploidy)))

#### VCF sanity check
checks <- vcf_sanity_check(input$pred_file$datapath)

error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "VCF_compressed"
)

error_if_true <- c(
"multiallelics", "phased_GT", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)

warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers")

checks_result <- vcf_sanity_messages(checks,
error_if_false,
error_if_true,
warning_if_false = warning_if_false,
warning_if_true = NULL,
input_ploidy = as.numeric(input$pred_ploidy))

if(checks_result) return() # Stop the analysis if checks fail

if (advanced_options$pred_matrix != "Amatrix"){
#### VCF sanity check
checks <- vcf_sanity_check(input$pred_file$datapath)

error_if_false <- c(
"VCF_header", "VCF_columns", "unique_FORMAT", "GT",
"samples", "VCF_compressed"
)

error_if_true <- c(
"multiallelics", "phased_GT", "mixed_ploidies",
"duplicated_samples", "duplicated_markers"
)

warning_if_false <- c("chrom_info", "pos_info", "ref_alt","max_markers")

checks_result <- vcf_sanity_messages(checks,
error_if_false,
error_if_true,
warning_if_false = warning_if_false,
warning_if_true = NULL,
input_ploidy = as.numeric(input$pred_ploidy))

if(checks_result) return() # Stop the analysis if checks fail
}
#########


Expand Down
24 changes: 12 additions & 12 deletions tests/testthat/test-DosageCall.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@ test_that("Dosage Calling from MADC file",{

# Call the get_counts function with the specified MADC file path and output file path
#Status
result_df <- get_counts(madc_file, output_name)
result_df <- BIGapp:::get_counts(madc_file, output_name)

#Call the get_matrices function
matrices <- get_matrices(result_df)
matrices <- BIGapp:::get_matrices(result_df)

mout <- updog::multidog(refmat = matrices$ref_matrix,
sizemat = matrices$size_matrix,
ploidy = as.numeric(ploidy),
model = model_select,
nc = cores)

expect_equal(sum(mout$snpdf$bias), 402.7979, tolerance = 0.01)
expect_equal(sum(mout$inddf$postmean), 95229.13, tolerance = 0.01)
expect_equal(sum(mout$snpdf$bias), 412.6139, tolerance = 0.01)
expect_equal(sum(mout$inddf$postmean), 95233.1, tolerance = 0.01)

# Convert updog to VCF
updog2vcf(
BIGr:::updog2vcf(
multidog.object = mout,
output.file = output_name,
updog_version = packageVersion("updog"),
Expand Down Expand Up @@ -69,7 +69,7 @@ test_that("Dosage Calling from VCF file",{
#Get items in FORMAT column
info <- vcf@gt[1,"FORMAT"] #Getting the first row FORMAT

info_ids <- extract_info_ids(info[1])
info_ids <- BIGapp:::extract_info_ids(info[1])
chrom <- vcf@fix[,1]
pos <- vcf@fix[,2]

Expand All @@ -96,8 +96,8 @@ test_that("Dosage Calling from VCF file",{
model = model_select,
nc = cores)

expect_equal(sum(mout$snpdf$bias), 402.79, tolerance = 0.01)
expect_equal(sum(mout$inddf$postmean), 95229.13, tolerance = 0.01)
expect_equal(sum(mout$snpdf$bias), 412.6139, tolerance = 0.01)
expect_equal(sum(mout$inddf$postmean), 95233.1, tolerance = 0.01)

# Convert updog to VCF
BIGr::updog2vcf(
Expand Down Expand Up @@ -164,7 +164,7 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{
#Get items in FORMAT column
info <- vcf@gt[1,"FORMAT"] #Getting the first row FORMAT

info_ids <- extract_info_ids(info[1])
info_ids <- BIGapp:::extract_info_ids(info[1])
chrom <- vcf@fix[,1]
pos <- vcf@fix[,2]

Expand Down Expand Up @@ -221,11 +221,11 @@ test_that("Dosage Calling from VCF file f1 and s1 model",{
model = input$updog_model,
nc = cores)

expect_equal(sum(mout$snpdf$bias), 408.5401, tolerance = 0.01)
expect_equal(sum(mout$inddf$postmean), 94249.05, tolerance = 0.01)
expect_equal(sum(mout$snpdf$bias), 440.109, tolerance = 0.01)
expect_equal(sum(mout$inddf$postmean), 94611.59, tolerance = 0.01)

# Convert updog to VCF
updog2vcf(
BIGr::updog2vcf(
multidog.object = mout,
output.file = output_name,
updog_version = packageVersion("updog"),
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-GSAcc.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,14 @@ test_that("test Predictive Ability iris",{
# Calculate the average value for each column in the traits list for each SampleID, ignoring Iter and Fold
average_gebvs_df <- results$GEBVs %>%
group_by(Sample) %>%
summarize(across(all_of(input$pred_trait_info), mean, na.rm = TRUE))

summarize(across(all_of(input$pred_trait_info), \(x) mean(x, na.rm = TRUE)))
pred_outputs_rrBLUP$avg_GEBVs <- average_gebvs_df

columns <- setdiff(colnames(results$PredictionAccuracy), c("Iter","Fold"))
average_accuracy_df <- results$PredictionAccuracy %>%
group_by(Iter) %>%
summarize(across(all_of(columns), mean, na.rm = TRUE))
summarize(across(all_of(columns), \(x) mean(x, na.rm = TRUE)))

pred_outputs_rrBLUP$comb_output <- average_accuracy_df

Expand Down