From 1b761b92586c5eca1521b52cb7e745edb7e173dc Mon Sep 17 00:00:00 2001
From: josuechinchilla <jc3635@cornell.edu>
Date: Tue, 4 Nov 2025 16:06:12 -0500
Subject: [PATCH 1/2] updated check_ped to save corrected dataframe and report

---
 .DS_Store     | Bin 0 -> 8196 bytes
 BIGr.Rproj    |   1 +
 DESCRIPTION   |   2 +-
 R/check_ped.R | 267 +++++++++++++++++++++++++++++---------------------
 4 files changed, 157 insertions(+), 113 deletions(-)
 create mode 100644 .DS_Store
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..3b9d8426aa5a73e270c1816e6de3b764b9deb853
GIT binary patch
literal 8196
zcmeHMU2GLa6rOKeV3!TB)B*)rduxSaxrVktrJ``TZ6iM|f48MS{M5a77rJ4)Tkh_?
zr8d?2qyaVYMdSY)Q6EsFDEjJuVtgP-l$eMy@rf6W;Dd?r%+6k*Eq&Aw*hyy2oHOUl
z^ql?9%$+G?4C#W|$XFF)Os2}Irc!g0!pC`?R-{N$P84L%{Nb$QWr;hR9iFBg3*rdG
z5r`uYM<9+s9D!RQ0(55cBF}K{i_y4^BM?X6j*I}`9}-kKO+|D<&~WRZCcFX=<yU~P
zP+#L6qKSy6B03=`jZlFwB`Qo2T`?fcNuG%GQW2dHRG2eFmk&fMBf6m=T%G1mgz5|_
zLE|=#KpcVT5#Z@l#)g=|-0aAW_jkwkgNB9+kjlzu&7LFAkyZI%X3!hS1V&Ks(?)(j
z=ej)4$(H&?cAssH3}}_Do*NjpYvn@&+q6jKK#%R3-bmUjxTYW4X2gK3$jX3LH8$4N
z(wt1J-OxCmOpLWOH6#=3);5oiEApb+H5>O19Ce0V?}Tsw(I&v=O>yYtb#ENM*eYHl
z$}-tu;myU8H_BX9ou~Ko_Vw>SFraA#-`Z<AMqn4MHY2e70d0X#&TY$i?(V!*B&T|E
zo>LfdX=Gu}aB>AwwzfD<#(rFA=klK8?Dl*+usyf0H?WQee2h;tX{fi~_wqq&I9abv
z^9!vCzQ(R$ZtU-~@;*^TIX!<#^?j=zXnOGBRQtBhJ?9r_3oF!0b#YJ8_U)`=b&uF)
zFxY4KIm<O|_eej*(+%vywv|^FBB^w_#*lTsqH5`~yO-<wRPE9%y@jlOtY8PDTCFUL
zHEQ%)MLQ!OFv-bI&*U?j;fP+N*U9pJE^p0S)C#d$uO}7Fkh^?xSxm7;ZB#U0={iau
zU6`&@n`O;Zx*a2HIYQK;Zj`khN>5REEQFQnW?AXYc|$|wj4xzqy-n7>mJjiTI}AVA
zHEg-1KPjzmmF0ea(8yc6MhJ2xiw;>1KO@PxNxNwWUOtS*&Kh0s<5O)hP0J*wmsKsT
z&@+0steqnp&3216ft<d9z`9(U>YNCHR-399fU6aRu`tH9sRqGUmc$X|hnmhBSek8T
zhuAP1WhdE->`nF|JHtL=U$F1k1@;sBjs4F4U{}~x_BYBg7nN9!6{y21+>a)##d<WO
z6&>irPV^%O6NhmG4hksZI7aaVp2jmcfs=Rvui#a@jyG@$@8UhYk5BO#KF62%3K#G*
zF5wqk#uZ$}Uy>qKNXsQ%x<{&$R!Q~JYH5?SS=uV?k@}?pX;63|l@jgXWY45g;-hVY
z5-oc6lTP0}(W)Qax^4T8j$1a-ew@!!>#oX0i<cx;)UR2$;gK!Z04VTUg6NIV579m;
zM?$oRh?t60k%MY=H>HDhe5O>?=J2$Y&vWgXd(~=f7A1^eSJn|bo03JatCQ8$nnXD&
z*oMYxRV$+mR6^VJNn#N(Ef1-xR!+Gn#y081vWQ8y(lk7?<o^oe->`G+2X={FCXD}s
zGRRnh1XdzRnBIgG;kpg&gzMediwwGO5CbrfC6qgGF^pr#V+`YX5)*ieF#ar_!}E9v
zFB8sRBcz|gTX-Aq-~*h&$2f~mLd1WI?{N{oPQh^JWDMia>liL(@~-2#M@Uja{AA_Z
zk}DHlRq^|O`^@kEx25l~Q*i|12#5%vye-p~rv6NOAHQoSs6IxO7p^xUXlO#sq=x|F
kul>W2`Uz6yHmQhC2uczv|NBG0^dH*s`5&MEcij2^FAa5Kvj6}9

literal 0
HcmV?d00001

diff --git a/BIGr.Rproj b/BIGr.Rproj
index 69fafd4..5638e2e 100644
--- a/BIGr.Rproj
+++ b/BIGr.Rproj
@@ -1,4 +1,5 @@
 Version: 1.0
+ProjectId: 0eeaab63-2615-4da7-b10a-927160fc78a3
 
 RestoreWorkspace: No
 SaveWorkspace: No
diff --git a/DESCRIPTION b/DESCRIPTION
index 31ad1e1..cba831e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -44,7 +44,7 @@ URL: https://github.com/Breeding-Insight/BIGr
 BugReports: https://github.com/Breeding-Insight/BIGr/issues
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Depends: R (>= 4.4.0)
 biocViews:
 Imports: 
diff --git a/R/check_ped.R b/R/check_ped.R
index 3f4831b..35b0ba9 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -1,99 +1,142 @@
-#' Evaluate Pedigree File for Accuracy
+#' Check a pedigree file for accuracy and report/correct common errors
 #'
-#' Check a pedigree file for accuracy and output suspected errors
+#' `check_ped` reads a 3-column pedigree file (tab-separated, columns labeled `id`, `sire`, `dam` in any order)
+#' and performs quality checks, optionally correcting or flagging errors.
 #'
-#'check_ped takes a 3-column pedigree tab separated file with columns labeled as id sire dam in any order and checks for:
-#'* Ids that appear more than once in the id column
-#'* Ids that appear in both sire and dam columns
-#'* Direct (e.g. parent is a offspring of his own daughter) and indirect (e.g. a great grandparent is son of its grandchild) dependencies within the pedigree.
-#'* Individuals included in the pedigree as sire or dam but not on the id column and reports them back with unknown parents (0).
+#' The function checks for:
+#' * Exact duplicate rows and removes them (keeping one copy)
+#' * IDs that appear more than once with conflicting sire/dam assignments (sets sire/dam to "0")
+#' * IDs that appear in both sire and dam columns
+#' * Missing parents (IDs referenced as sire/dam but not in `id` column), adds them with sire/dam = "0"
+#' * Direct and indirect pedigree dependencies (cycles), such as a parent being its own descendant
 #'
-#'When using check_ped, do a first run to check for repeated ids and parents that appear as sire and dam.
-#'Once these errors are cleaned run the function again to check for dependencies as this will provide the most accurate report.
+#' After an initial run to clean exact duplicates and repeated IDs, you can run the function again to detect cycles more accurately.
 #'
-#'Note: This function does not change the input file but prints any errors found in the console.
+#' The function does **not** overwrite the input file. Instead, it prints findings to the console and optionally saves:
+#' * Corrected pedigree as a dataframe in the global environment
+#' * A report listing all detected issues
+#'
+#' @param ped.file Path to the pedigree text file.
+#' @param seed Optional seed for reproducibility.
+#' @param verbose Logical. If TRUE (default), prints errors and prompts for interactive saving.
+#'
+#' @return A list of data.frames containing detected issues:
+#' * `exact_duplicates`: rows that were exact duplicates
+#' * `repeated_ids_diff`: IDs appearing more than once with conflicting sire/dam
+#' * `messy_parents`: IDs appearing as both sire and dam
+#' * `missing_parents`: parents added to the pedigree with 0 as sire/dam
+#' * `dependencies`: detected cycles in the pedigree
 #'
-#' @param ped.file path to pedigree text file. The pedigree file is a
-#' 3-column pedigree tab separated file with columns labeled as id sire dam in any order
-#' @param seed Optional seed for reproducibility
-#' @param verbose Logical. If TRUE, print the errors to the console.
-#' @return A list of data.frames of error types, and the output printed to the console
 #' @examples
-#' ##Get list with a dataframe for each error type
-#' ped_file <- system.file("check_ped_test.txt", package="BIGr")
-#' ped_errors <- check_ped(ped.file = ped_file,
-#'                         seed = 101919)
+#' ped_file <- system.file("check_ped_test.txt", package = "BIGr")
+#' ped_errors <- check_ped(ped.file = ped_file, seed = 101919)
 #'
-#' ##Access the "messy parents" dataframe result
+#' # Access messy parents
 #' ped_errors$messy_parents
 #'
-#' ##Get list of sample IDs with messy parents error
-#' messy_parent_ids <- ped_errors$messy_parents$id
-#' print(messy_parent_ids)
+#' # IDs with messy parents
+#' messy_ids <- ped_errors$messy_parents$id
+#' print(messy_ids)
+#'
 #' @import dplyr
 #' @import janitor
 #' @importFrom stats setNames
 #' @importFrom utils read.table
 #' @export
 check_ped <- function(ped.file, seed = NULL, verbose = TRUE) {
-  #### Function to check for hierarchical errors missing parents and repeated ids ####
-  if(!is.null(seed)){
-    set.seed(seed)
-  }
-  #### read in data ####
-  data = utils::read.table(ped.file, header = T)
-  data <- data %>%
+
+  #### setup ####
+  if (!is.null(seed)) set.seed(seed)
+
+  # Read and clean data
+  data <- utils::read.table(ped.file, header = TRUE) %>%
     janitor::clean_names() %>%
     mutate(
       id = as.character(id),
       sire = as.character(sire),
       dam = as.character(dam)
-      )
-  #Missing parents dataframe initialize
-  missing_parents <- data.frame(id = character(), sire = character(), dam = character(), stringsAsFactors = FALSE)
+    )
+
+  original_data <- data
   errors <- list()
-  # repeated id checks
-  n_occur <- data.frame(table(data$id))
-  repeated_ids = n_occur[n_occur$Freq > 1,] %>%
-    rename(id = Var1)
-  # Check for ids that appear as both sire and dam ###This is possible for plants so maybe do not control for this or do not delete these rows just print them
-  messy_parents <- as.data.frame(intersect(data$sire, data$dam)) %>%
-    rename(id = 1) %>%
-    filter(id != 0)
-  # Missing parents check
+  missing_parents <- data.frame(id = character(), sire = character(), dam = character(), stringsAsFactors = FALSE)
+
+  #### check 1: exact duplicates ####
+  exact_duplicates <- data[duplicated(data), ]
+  if (nrow(exact_duplicates) > 0) {
+    data <- distinct(data)  # remove exact duplicates
+  }
+
+  #### check 2: repeated IDs with conflicting sire/dam ####
+  repeated_ids <- data %>%
+    group_by(id) %>%
+    filter(n() > 1) %>%
+    ungroup()
+
+  # Only IDs with actual conflicting sire/dam
+  conflicting_ids <- repeated_ids %>%
+    group_by(id) %>%
+    filter(n_distinct(sire) > 1 | n_distinct(dam) > 1) %>%
+    ungroup()
+
+  if (nrow(conflicting_ids) > 0) {
+    # Keep one row per ID, set sire/dam to "0"
+    data <- data %>%
+      group_by(id) %>%
+      summarize(
+        sire = if(n_distinct(sire) > 1) "0" else first(sire),
+        dam = if(n_distinct(dam) > 1) "0" else first(dam),
+        .groups = "drop"
+      )
+  }
+
+  repeated_ids_report <- conflicting_ids
+
+  #### check 3: missing parents ####
   for (i in 1:nrow(data)) {
     id <- data$id[i]
     sire <- data$sire[i]
     dam <- data$dam[i]
+
     if (sire != "0" && sire != id && !sire %in% data$id) {
       missing_parents <- rbind(missing_parents, data.frame(id = sire, sire = "0", dam = "0", stringsAsFactors = FALSE))
     }
     if (dam != "0" && dam != id && !dam %in% data$id) {
       missing_parents <- rbind(missing_parents, data.frame(id = dam, sire = "0", dam = "0", stringsAsFactors = FALSE))
     }
+
     if (sire == id || dam == id) {
       errors <- append(errors, paste("Dependency: Individual", id, "cannot be its own parent"))
     }
   }
-  # Remove duplicates
+
   missing_parents <- distinct(missing_parents)
-  # Combine original data with missing parents
-  corrected_data <- bind_rows(data, missing_parents)
-  # Function to detect cycles in the pedigree graph and identify the nodes involved
+  if (nrow(missing_parents) > 0) {
+    data <- bind_rows(data, missing_parents)
+  }
+
+  #### check 4: messy parents ####
+  sire_ids <- unique(data$sire[data$sire != "0"])
+  dam_ids <- unique(data$dam[data$dam != "0"])
+  messy_ids <- intersect(sire_ids, dam_ids)
+  messy_parents <- data %>% filter(id %in% messy_ids)
+
+  #### check 5: dependencies (cycles) ####
   detect_all_cycles <- function(data) {
-    # Create an adjacency list
-    adj_list <- list()
-    for (i in 1:nrow(data)) {
-      adj_list[[data$id[i]]] <- c(data$sire[i], data$dam[i])
-    }
-    # Helper function to perform DFS and detect cycles
+    adj_list <- lapply(data$id, function(x) {
+      row <- data[data$id == x, ]
+      c(row$sire, row$dam)
+    })
+    names(adj_list) <- data$id
+
     dfs <- function(node, visited, rec_stack, path) {
       visited[node] <- TRUE
       rec_stack[node] <- TRUE
       path <- append(path, node)
       cycles <- list()
+
       for (neighbor in adj_list[[node]]) {
-        if (neighbor != "0") {
+        if (neighbor %in% names(adj_list)) {
           if (!visited[neighbor]) {
             cycles <- append(cycles, dfs(neighbor, visited, rec_stack, path))
           } else if (rec_stack[neighbor]) {
@@ -102,14 +145,15 @@ check_ped <- function(ped.file, seed = NULL, verbose = TRUE) {
           }
         }
       }
+
       rec_stack[node] <- FALSE
       return(cycles)
     }
-    # Initialize visited and recursion stack
+
     visited <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     rec_stack <- stats::setNames(rep(FALSE, length(adj_list)), names(adj_list))
     all_cycles <- list()
-    # Check for cycles in the graph and return the nodes involved
+
     for (node in names(adj_list)) {
       if (!visited[node]) {
         node_cycles <- dfs(node, visited, rec_stack, character())
@@ -120,75 +164,74 @@ check_ped <- function(ped.file, seed = NULL, verbose = TRUE) {
     }
     return(all_cycles)
   }
-  # Check for cycles in the corrected pedigree data
-  cycles <- detect_all_cycles(corrected_data)
+
+  cycles <- detect_all_cycles(data)
   if (length(cycles) > 0) {
-    cycle_number <- 1
     for (cycle_group in cycles) {
       cycle_ids <- unique(unlist(cycle_group))
       errors <- append(errors, paste("Cycle detected involving nodes:", paste(cycle_ids, collapse = " -> ")))
     }
   }
-  results <- list(missing_parents = missing_parents, dependencies = data.frame(Dependency = unlist(errors)), repeated_ids = repeated_ids, messy_parents = messy_parents)
-  repeated_ids <- results$repeated_ids
-  missing_parents <- results$missing_parents
-  messy_parents <- results$messy_parents
-  errors <- results$dependencies
-  # Adding the dataframes as an output list
-  output.results <- list()
-  #### Print errors and cycles ####
-  # Print repeated ids if any
-  if (nrow(repeated_ids) > 0) {
-    if (verbose) {
-      cat("Repeated ids found:\n")
-      message(repeated_ids)
-    }
-    output.results$repeated_ids <- repeated_ids
 
-  } else {
-    if (verbose) {
-      cat("No repeated ids found.\n")
-    }
-  }
-  #Print parents that appear as male and female
-  if (nrow(messy_parents) > 0) {
-    if (verbose) {
-      cat("Ids found as male and female parent:\n")
-      message(messy_parents)
-    }
-    output.results$messy_parents <- messy_parents
+  #### compile findings ####
+  input_ped_report <- list(
+    exact_duplicates = exact_duplicates,
+    repeated_ids_diff = repeated_ids_report,
+    messy_parents = messy_parents,
+    missing_parents = missing_parents,
+    dependencies = data.frame(Dependency = unique(unlist(errors)))
+  )
 
-  } else {
-    if (verbose) {
-      cat("No ids found as male and female parent.\n")
-    }
-  }
-  # Print missing parents if any
-  if (nrow(missing_parents) > 0) {
-    if (verbose) {
-      cat("Missing parents found:\n")
-      message(missing_parents)
-    }
-    output.results$missing_parents <- missing_parents
+  #### file names ####
+  file_base <- tools::file_path_sans_ext(basename(ped.file))
+  corrected_name <- paste0(file_base, "_corrected")
+  report_name <- paste0(file_base, "_report")
 
-  } else {
-    if (verbose) {
-      cat("No missing parents found.\n")
-    }
-  }
-  # Print errors if any
-  if (nrow(errors) > 0) {
-    if (verbose) {
-      cat("Dependencies found:\n")
-      message(unique(errors$Dependency))
+  #### output ####
+  if (verbose) {
+    cat("\n=== Pedigree Quality Check Report ===\n")
+
+    if (nrow(exact_duplicates) > 0) {
+      cat("\nExact duplicate rows detected and removed (only one copy kept):\n")
+      print(exact_duplicates)
+    } else cat("\nNo exact duplicate rows found.\n")
+
+    if (nrow(repeated_ids_report) > 0) {
+      cat("\nRepeated IDs with conflicting sire/dam (sire/dam set to 0 in corrected pedigree):\n")
+      print(repeated_ids_report)
+    } else cat("\nNo conflicting repeated IDs found.\n")
+
+    if (nrow(messy_parents) > 0) {
+      cat("\nIDs found as both sire and dam:\n")
+      print(messy_parents)
+    } else cat("\nNo IDs found as both sire and dam.\n")
+
+    if (nrow(missing_parents) > 0) {
+      cat("\nMissing parents were added to the pedigree with 0 as sire/dam:\n")
+      print(missing_parents)
+    } else cat("\nNo missing parents found.\n")
+
+    if (nrow(input_ped_report$dependencies) > 0) {
+      cat("\nDependencies detected:\n")
+      print(input_ped_report$dependencies)
+    } else cat("\nNo dependencies detected.\n")
+
+    #### interactive save ####
+    cat(paste0("\nDo you want to save the corrected pedigree as dataframe `", corrected_name, "`? (y/n): "))
+    ans <- tolower(trimws(readline()))
+    if (ans == "y") {
+      assign(corrected_name, data, envir = .GlobalEnv)
+      assign("input_ped_report", input_ped_report, envir = .GlobalEnv)
+      cat(paste0("Saved corrected pedigree as `", corrected_name, "` and report as `input_ped_report`.\n"))
+    } else {
+      cat("No corrected pedigree was saved.\n")
     }
-    output.results$dependencies <- data.frame(Dependency = unlist(errors))
 
   } else {
-    if (verbose) {
-      cat("No dependencies found.\n")
-    }
+    # Silent automatic mode
+    assign(corrected_name, data, envir = .GlobalEnv)
+    assign(report_name, input_ped_report, envir = .GlobalEnv)
   }
 
-  return(results)
+  invisible(input_ped_report)
 }

From 743043a0753f35def5d52c441d077f07800adb1d Mon Sep 17 00:00:00 2001
From: josuechinchilla <jc3635@cornell.edu>
Date: Tue, 4 Nov 2025 16:23:14 -0500
Subject: [PATCH 2/2] reorganized report and fixed language

---
 R/check_ped.R    | 21 +++++++++---------
 man/check_ped.Rd | 56 ++++++++++++++++++++++++++++--------------------
 2 files changed, 44 insertions(+), 33 deletions(-)

diff --git a/R/check_ped.R b/R/check_ped.R
index 35b0ba9..a2277bc 100644
--- a/R/check_ped.R
+++ b/R/check_ped.R
@@ -169,7 +169,7 @@ check_ped <- function(ped.file, seed = NULL, verbose = TRUE) {
   if (length(cycles) > 0) {
     for (cycle_group in cycles) {
       cycle_ids <- unique(unlist(cycle_group))
-      errors <- append(errors, paste("Cycle detected involving nodes:", paste(cycle_ids, collapse = " -> ")))
+      errors <- append(errors, paste("Cycle detected involving IDs:", paste(cycle_ids, collapse = " -> ")))
     }
   }
 
@@ -192,24 +192,25 @@ check_ped <- function(ped.file, seed = NULL, verbose = TRUE) {
     cat("\n=== Pedigree Quality Check Report ===\n")
 
     if (nrow(exact_duplicates) > 0) {
-      cat("\nExact duplicate rows detected and removed (only one copy kept):\n")
+      cat("\n Exact duplicate trios detected (only one copy will be kept in corrected pedigree):\n")
       print(exact_duplicates)
-    } else cat("\nNo exact duplicate rows found.\n")
+    } else cat("\nNo exact duplicate trios found.\n")
 
     if (nrow(repeated_ids_report) > 0) {
-      cat("\nRepeated IDs with conflicting sire/dam (sire/dam set to 0 in corrected pedigree):\n")
+      cat("\nConflicting trios detected (sire/dam set to 0 in corrected pedigree):\n")
       print(repeated_ids_report)
-    } else cat("\nNo conflicting repeated IDs found.\n")
+    } else cat("\nNo conflicting repeated trios found.\n")
+
+    if (nrow(missing_parents) > 0) {
+      cat("\n Parents missing as IDs found in the pedigree (will be added as founders in corrected pedigree):\n")
+      print(missing_parents)
+    } else cat("\nNo missing parents found.\n")
 
     if (nrow(messy_parents) > 0) {
-      cat("\nIDs found as both sire and dam:\n")
+      cat("\n IDs found as both sire and dam (is selfing or hermaphrodytism possible?):\n")
       print(messy_parents)
     } else cat("\nNo IDs found as both sire and dam.\n")
 
-    if (nrow(missing_parents) > 0) {
-      cat("\nMissing parents were added to the pedigree with 0 as sire/dam:\n")
-      print(missing_parents)
-    } else cat("\nNo missing parents found.\n")
 
     if (nrow(input_ped_report$dependencies) > 0) {
       cat("\nDependencies detected:\n")
diff --git a/man/check_ped.Rd b/man/check_ped.Rd
index 693bfe0..ea63de7 100644
--- a/man/check_ped.Rd
+++ b/man/check_ped.Rd
@@ -2,48 +2,58 @@
 % Please edit documentation in R/check_ped.R
 \name{check_ped}
 \alias{check_ped}
-\title{Evaluate Pedigree File for Accuracy}
+\title{Check a pedigree file for accuracy and report/correct common errors}
 \usage{
 check_ped(ped.file, seed = NULL, verbose = TRUE)
 }
 \arguments{
-\item{ped.file}{path to pedigree text file. The pedigree file is a
-3-column pedigree tab separated file with columns labeled as id sire dam in any order}
+\item{ped.file}{Path to the pedigree text file.}
 
-\item{seed}{Optional seed for reproducibility}
+\item{seed}{Optional seed for reproducibility.}
 
-\item{verbose}{Logical. If TRUE, print the errors to the console.}
+\item{verbose}{Logical. If TRUE (default), prints errors and prompts for interactive saving.}
 }
 \value{
-A list of data.frames of error types, and the output printed to the console
+A list of data.frames containing detected issues:
+\itemize{
+\item \code{exact_duplicates}: rows that were exact duplicates
+\item \code{repeated_ids_diff}: IDs appearing more than once with conflicting sire/dam
+\item \code{messy_parents}: IDs appearing as both sire and dam
+\item \code{missing_parents}: parents added to the pedigree with 0 as sire/dam
+\item \code{dependencies}: detected cycles in the pedigree
+}
 }
 \description{
-Check a pedigree file for accuracy and output suspected errors
+\code{check_ped} reads a 3-column pedigree file (tab-separated, columns labeled \code{id}, \code{sire}, \code{dam} in any order)
+and performs quality checks, optionally correcting or flagging errors.
 }
 \details{
-check_ped takes a 3-column pedigree tab separated file with columns labeled as id sire dam in any order and checks for:
+The function checks for:
 \itemize{
-\item Ids that appear more than once in the id column
-\item Ids that appear in both sire and dam columns
-\item Direct (e.g. parent is a offspring of his own daughter) and indirect (e.g. a great grandparent is son of its grandchild) dependencies within the pedigree.
-\item Individuals included in the pedigree as sire or dam but not on the id column and reports them back with unknown parents (0).
+\item Exact duplicate rows and removes them (keeping one copy)
+\item IDs that appear more than once with conflicting sire/dam assignments (sets sire/dam to "0")
+\item IDs that appear in both sire and dam columns
+\item Missing parents (IDs referenced as sire/dam but not in \code{id} column), adds them with sire/dam = "0"
+\item Direct and indirect pedigree dependencies (cycles), such as a parent being its own descendant
 }
 
-When using check_ped, do a first run to check for repeated ids and parents that appear as sire and dam.
-Once these errors are cleaned run the function again to check for dependencies as this will provide the most accurate report.
+After an initial run to clean exact duplicates and repeated IDs, you can run the function again to detect cycles more accurately.
 
-Note: This function does not change the input file but prints any errors found in the console.
+The function does \strong{not} overwrite the input file. Instead, it prints findings to the console and optionally saves:
+\itemize{
+\item Corrected pedigree as a dataframe in the global environment
+\item A report listing all detected issues
+}
 }
 \examples{
-##Get list with a dataframe for each error type
-ped_file <- system.file("check_ped_test.txt", package="BIGr")
-ped_errors <- check_ped(ped.file = ped_file,
-                        seed = 101919)
+ped_file <- system.file("check_ped_test.txt", package = "BIGr")
+ped_errors <- check_ped(ped.file = ped_file, seed = 101919)
 
-##Access the "messy parents" dataframe result
+# Access messy parents
 ped_errors$messy_parents
 
-##Get list of sample IDs with messy parents error
-messy_parent_ids <- ped_errors$messy_parents$id
-print(messy_parent_ids)
+# IDs with messy parents
+messy_ids <- ped_errors$messy_parents$id
+print(messy_ids)
+
 }