R Code to summarize Dept of Ed, Civil Rights Data by school district / LEA

Notes:

library(dplyr)

crdc <- "C:\\data\\CRDC\\CRDC\\School"
load_csv_files <- function(folder_path) {
  csv_files <- list.files(folder_path, pattern = "\\.csv$", full.names = TRUE)
  
  loaded_dataframes <- list()
  
  for (csv_file in csv_files) {
    dataframe <- read.csv(csv_file) 
    pa_data <- dataframe %>% filter(LEA_STATE == 'PA')
    
    loaded_dataframes[[basename(csv_file)]] <- pa_data
  }
  
  return(loaded_dataframes)
}

all_data <- load_csv_files(crdc)

group_and_summarize <- function(data_frame) {
  grouped_data <- data_frame %>%
    group_by(LEA_STATE, LEA_STATE_NAME, LEAID, LEA_NAME) %>%
    select(-SCHID, -SCH_NAME, -COMBOKEY, -JJ)

  grouped_data[grouped_data<0] <- NA

  return (grouped_data %>% summarise_all(function(x) {
      if (is.numeric(x)) {
        sum(x, na.rm = TRUE)
      } else if (is.logical(x)) {
        any(x)
      } else {
        first(x)
      }
    }))
}

summarize_all_dfs <- function(all_data) {
  summarized_dataframes <- list()

  for (name in names(all_data)) {
    summarized_dataframes[[name]] <- group_and_summarize(all_data[[name]])
  }

  return (summarized_dataframes)
}

all_summarized <- summarize_all_dfs(all_data)

write_dataframes_to_csv <- function(dataframes_list, output_folder) {
  if (!file.exists(output_folder)) {
    dir.create(output_folder)
  }
  
  for (csv_name in names(dataframes_list)) {
    output_path <- file.path(output_folder, csv_name)
    
    write.csv(dataframes_list[[csv_name]], file = output_path, row.names = FALSE)
  }
}

write_dataframes_to_csv (all_summarized, "C:\\data\\CRDC\\CRDC\\School\\summarized")