#' Print Method for Multiple Imputation Results
#'
#' @param x A list containing multiple imputation results with Rubin's statistics
#' @param digits Number of digits to display for numeric values (default: 4)
#' @param show_individual Logical, whether to show individual imputation results (default: FALSE)
#' @param ... Additional arguments (not currently used)
#' @return No return value, called for side effects
#' @examples
#' # This example is not executed since it needs additional software (Apache Spark)
#' \dontrun{
#' # Example for mice.spark function
#' library(sparklyr)
#' library(dplyr)
#'
#' # Connect to Spark
#' # Assumes that you have already installed Spark with sparklyr::spark_install()
#' sc <- spark_connect(master = "local")
#'
#' # Create sample data with missing values
#' sample_data <- data.frame(
#'   outcome = c(1, 0, NA, 1, NA, 0),
#'   age = c(25, NA, 35, 28, 45, NA),
#'   income = c(50000, 60000, NA, 55000, 80000, 52000),
#'   education = c("High", "Medium", "High", NA, "Medium", "Medium")
#' )
#'
#' # Copy to Spark DataFrame
#' sdf <- copy_to(sc, sample_data, "sample_data")
#'
#' # Define variable types
#' variable_types <- c(
#'   outcome = "Binary",
#'   age = "Continuous_int",
#'   income = "Continuous_int",
#'   education = "Nominal"
#' )
#'
#' # Define analysis formula
#' analysis_formula <- outcome ~ age + income + education
#'
#' # Run MICE imputation
#' mice_result <- mice.spark(
#'   data = sdf,
#'   sc = sc,
#'   variable_types = variable_types,
#'   analysis_formula = analysis_formula,
#'   m = 3,  # Number of imputations
#'   maxit = 2,  # Number of iterations
#'   printFlag = TRUE,
#'   seed = 123,
#'   checkpointing = FALSE  # Set to TRUE if HDFS is available
#' )
#'
#' # See results
#'  print(mice_result)
#' # Clean up
#'  spark_disconnect(sc)
#'  }
#' @export

print.mi_results <- function(x, digits = 4, show_individual = FALSE, ...) {

  # Validate input
  if (!is.list(x) || !all(c("rubin_stats", "per_imputation") %in% names(x))) {
    stop("Input must be a list with 'rubin_stats' and 'per_imputation' components")
  }

  # Header
  cat("Multiple Imputation Results\n")
  cat("==========================\n\n")

  # Summary information
  m <- nrow(x$per_imputation)
  cat("Number of imputations:", m, "\n")

  if ("imputation_time" %in% names(x$per_imputation)) {
    total_time <- sum(x$per_imputation$imputation_time, na.rm = TRUE)
    avg_time <- mean(x$per_imputation$imputation_time, na.rm = TRUE)
    cat("Total imputation time:", round(total_time, 2), "seconds\n")
    cat("Average time per imputation:", round(avg_time, 2), "seconds\n")
  }

  cat("\n")

  # Pooled Results using Rubin's Rules
  cat("Pooled Parameter Estimates (Rubin's Rules)\n")
  cat("==========================================\n")

  if (length(x$rubin_stats) > 0) {
    # Create summary table
    param_names <- names(x$rubin_stats)
    n_params <- length(param_names)

    # Initialize result matrix
    result_table <- matrix(NA, nrow = n_params, ncol = 6)
    colnames(result_table) <- c("Estimate", "Within_Var", "Between_Var",
                                "Total_Var", "SE", "t_stat")
    rownames(result_table) <- param_names

    for (i in seq_along(param_names)) {
      param <- param_names[i]
      stats <- x$rubin_stats[[param]]

      result_table[i, "Estimate"] <- stats$pooled_param
      result_table[i, "Within_Var"] <- stats$within_var
      result_table[i, "Between_Var"] <- stats$between_var
      result_table[i, "Total_Var"] <- stats$total_var
      result_table[i, "SE"] <- sqrt(stats$total_var)

      # t-statistic (assuming null hypothesis: parameter = 0)
      if (stats$total_var > 0) {
        result_table[i, "t_stat"] <- stats$pooled_param / sqrt(stats$total_var)
      }
    }

    # Print the table with specified digits
    print(round(result_table, digits))

    # Additional diagnostics
    cat("\nDiagnostic Information:\n")
    cat("-----------------------\n")
    for (param in param_names) {
      stats <- x$rubin_stats[[param]]
      if (!is.null(stats$values) && length(stats$values) > 1) {
        # Relative increase in variance due to nonresponse
        r <- (stats$between_var + stats$between_var/m) / stats$within_var
        # Fraction of missing information
        lambda <- (stats$between_var + stats$between_var/m) / stats$total_var
        # Degrees of freedom
        df <- (m - 1) * (1 + 1/r)^2

        cat(sprintf("%-15s: r=%.3f, lambda=%.3f, df=%.1f\n",
                    param, r, lambda, df))
      }
    }
  } else {
    cat("No pooled statistics available.\n")
  }

  cat("\n")

  # Individual imputation results (optional)
  if (show_individual && nrow(x$per_imputation) > 0) {
    cat("Individual Imputation Results\n")
    cat("=============================\n")

    # Select relevant columns for display
    display_cols <- names(x$per_imputation)
    # Remove non-numeric columns for cleaner display
    numeric_cols <- sapply(x$per_imputation, is.numeric)

    if (any(numeric_cols)) {
      per_imp_display <- x$per_imputation[, numeric_cols, drop = FALSE]
      per_imp_display[, numeric_cols] <- lapply(per_imp_display[, numeric_cols],
                                                function(x) round(x, digits))
      print(per_imp_display)
    } else {
      print(x$per_imputation)
    }
    cat("\n")
  }

  # Footer with usage notes
  cat("Notes:\n")
  cat("------\n")
  cat("- SE: Standard Error (sqrt of Total_Var)\n")
  cat("- t_stat: t-statistic for testing parameter = 0\n")
  cat("- r: Relative increase in variance due to nonresponse\n")
  cat("- lambda: Fraction of missing information\n")
  cat("- df: Degrees of freedom for t-distribution\n")
  if (!show_individual) {
    cat("- Use show_individual=TRUE to see results from each imputation\n")
  }

  invisible(x)
}

#' Summary Method for Multiple Imputation Results
#'
#' @param object A list containing multiple imputation results
#' @param ... Additional arguments passed to print method
#' @return No return value, called for side effects
#' @examples
#' # This example is not executed since it needs additional software (Apache Spark)
#' \dontrun{
#' # Example for mice.spark function
#' library(sparklyr)
#' library(dplyr)
#'
#' # Connect to Spark
#' # Assumes that you have already installed Spark with sparklyr::spark_install()
#' sc <- spark_connect(master = "local")
#'
#' # Create sample data with missing values
#' sample_data <- data.frame(
#'   outcome = c(1, 0, NA, 1, NA, 0),
#'   age = c(25, NA, 35, 28, 45, NA),
#'   income = c(50000, 60000, NA, 55000, 80000, 52000),
#'   education = c("High", "Medium", "High", NA, "Medium", "Medium")
#' )
#'
#' # Copy to Spark DataFrame
#' sdf <- copy_to(sc, sample_data, "sample_data")
#'
#' # Define variable types
#' variable_types <- c(
#'   outcome = "Binary",
#'   age = "Continuous_int",
#'   income = "Continuous_int",
#'   education = "Nominal"
#' )
#'
#' # Define analysis formula
#' analysis_formula <- outcome ~ age + income + education
#'
#' # Run MICE imputation
#' mice_result <- mice.spark(
#'   data = sdf,
#'   sc = sc,
#'   variable_types = variable_types,
#'   analysis_formula = analysis_formula,
#'   m = 3,  # Number of imputations
#'   maxit = 2,  # Number of iterations
#'   printFlag = TRUE,
#'   seed = 123,
#'   checkpointing = FALSE  # Set to TRUE if HDFS is available
#' )
#'
#' # See results
#' summary(mice_result)
#' # Clean up
#' spark_disconnect(sc)
#'  }
#' @export

summary.mi_results <- function(object, ...) {
  print(object, ...)
}


create_mi_results <- function(results_list) {
  class(results_list) <- c("mi_results", "list")
  return(results_list)
}
