TECH-Levers: R-code master2

 ### Script generates tables and charts for each institution  
 ### ... adds components to list object for each institution  
 #  
 ### A. Set the working directory, load required libraries,   
 ### and load .rda file containing saved objects  
 rm(list = ls()) ### clear the environment  
 home <- "/blah-blah...blah-blah/PROJECT-AFFIRMATIVE-ACTION-LEGAL-CHALLENGE/"  
 setwd(home)  
 load(file="master1.rda")  
 ### Developed using R = version 3.5.1 and R Studio = version 1.456  
 ### install.packages("tidyverse")  
 library(tidyverse)  
 #  
 ### B. Source the local functions  
 #################################  
 #################################  
   
 makeTableGradRace <- function(dt) {  
 ### Function creates tableGradRace that contains the distribution  
 ###  of graduates in the 39 IPEDS degree areas for each racial group  
 #  
   dt2 <- dt[["allGradData16"]]  
   areaNames <- colnames(dt2[-c(1)])  
   raceNames <- dt2$race  
   #  
   # Trnspose the tibble  
   dt2 <- t(dt2)   
   dt2 <- dt2[-1,]  
   rowNames <- rownames(dt2)  
   dimMat <- dim(dt2)  
   #  
   # Convert character to integer matrix   
   vec <- as.integer(dt2)  
   mat <- matrix(vec, nrow = dimMat[1], ncol = dimMat[2])  
     
   dt2 <- as.data.frame(mat)  
   colnames(dt2) <- raceNames  
   #  
   areaNames <- as.data.frame(areaNames)   
   colnames(areaNames) <- "DegreeAreas"  
   areaNames$DegreeAreas <- as.character(areaNames$DegreeAreas)  
   dt2 <- data.frame(areaNames, dt2)  
   #  
   dt2 <- dt2 %>%  
     # Capitalize races for display in tables  
     mutate(Others = total - (white + black + hispanic + asian  
            + nonResAlien + unknown + twoOrMoreRaces)) %>%  
     rename(All = total, White = white,  
         Black = black, Hispanic = hispanic,  
         Asian = asian, Unknown = unknown,  
         NonResAlien = nonResAlien,  
         TwoOrMore = twoOrMoreRaces) %>%  
     # Reorder  
     select(DegreeAreas, All, White, Asian, Hispanic, Black,   
         TwoOrMore, Unknown, NonResAlien, Others) %>%  
     arrange(desc(Asian))   
   #  
   dt2 <- as.tibble(dt2)  
   #  
   # Remove internal prefix/suffix  
   dt2$DegreeAreas <- str_replace(dt2$DegreeAreas, "Grads", "")  
   dt2$DegreeAreas <- str_replace(dt2$DegreeAreas, "16", "")  
   #  
   ################### IMPORTANT CRITERION  
   minA2W <- round(dt2[1, "Asian"]/dt2[1, "White"],2)   
   ##################  
   ###################  
   #  
   dt[["minA2W"]] <- as.numeric(unclass(minA2W))  
   dt[["tableGradRace"]] <- dt2  
   return(dt)  
 }  
 #  
 modifyTableGradRace <- function(dt) {  
 ### Function adds statistical rows to tableGradRace  
 ###   plus an A2W column  
 ###   and adds "AFA" prefix to degrees in Asian Focus  
 #  
   minA2W <- dt[["minA2W"]]  
   dt1 <- dt[["tableGradRace"]]  
   dt1 <- dt1 %>%  
     mutate(A2W = round(Asian/White, 2))  
   dt1[1,1] <- "...SumClassOf2016..." ### rename total area  
   #  
   allAreasLine <- dt1[1,]  
   PercentClassOf2016 <- round((100 * allAreasLine[1,-1] / unclass(allAreasLine[1,2])), 0)  
   allAreas2Lines <- bind_rows(allAreasLine, PercentClassOf2016)  
   allAreas2Lines[2,1] <- "...PercentClassOf2016..."  
   allAreas2Lines[2, "A2W"] <- allAreas2Lines[1, "A2W"]  
   dt1 <- dt1[-1,] # drop original total line  
   dt1 <- bind_rows(dt1, allAreas2Lines) # add totals and % to bottom  
   #  
   # Create temporary table to store the areas in the Asian focus  
   # ... and compute stats on them   
   tableAsianFocus <- dt1  
   # Drop the totals and percent rows  
   nRows <- dim(dt1)[1]  
   tableAsianFocus <- tableAsianFocus[-c(nRows, nRows-1),] %>%   
     filter(A2W > minA2W)   
   asianFocusAreaData <- tableAsianFocus[, -1] ## not 1st col = names of areas  
   totAsianFocusAreaData <- colSums(asianFocusAreaData, na.rm = TRUE)  
   dt1 <- bind_rows(dt1, totAsianFocusAreaData)  
   nRows <- dim(dt1)[1]  
   dt1[nRows, 1] <- "...SumAsianFocusAreas..."  
   # Recompute the A2W value ... not the sum of A2W  
   dt1[nRows, "A2W"] <- round(dt1[nRows, "Asian"]/dt1[nRows, "White"], 2)  
   #  
   perAllInAsianFocusAreas <- round(100 * totAsianFocusAreaData/allAreasLine[,-1], 0)  
   perAllInAsianFocusAreas$DegreeAreas <- "perInAsianFocus"  
   perAllInAsianFocusAreas["A2W"] <- round(perAllInAsianFocusAreas["Asian"]/perAllInAsianFocusAreas["White"], 2)  
   #  
   dt1 <- bind_rows(dt1, perAllInAsianFocusAreas)  
   dt1[nRows + 1, 1] <- "...PercentAsianFocus..."  
   nRows <- dim(dt1)[1]  
   dtStatsRows <- dt1[c(nRows-3, nRows-2, nRows-1, nRows),]  
   #  
   # Save names of categories in Asian focus area  
   ###rows <- dim(tableAsianFocus)[1]  
   asianFocusAreas <- tableAsianFocus[, 1]  
   asianFocusAreas <- unlist(asianFocusAreas)  
   names(asianFocusAreas) <- NULL  
   dt[["asianFocusAreas"]] <- asianFocusAreas   
   #  
   patterns <- asianFocusAreas  
   prefix <- "AFA-"  
   dt1 <- addPrefixToAFArows(dt1, "DegreeAreas", patterns, prefix)  
   # stat rows prefixed by "..." will be at bottom; "AFA-" rows will be above stats  
   #   
   ### Some small institutions may not give degrees in  
   ### every area every year   
   ### Also SocialSciencesWithoutEconPoli may be zero if  
   ### SocSci degrees were only Econ and/or PoliSci  
   dt1 <- dt1 %>%  
     filter(All > 0) %>%  
     # This sort places the stats rows at bottom  
     # because of "..." prefixes  
     arrange(desc(DegreeAreas))  
   #  
   nRows <- dim(dt1)[1]  
   # dt2 rows contain degree area data  
   dt2 <- dt1[-c(nRows, nRows-1, nRows-2, nRows-3),]  
   # Sort the degree area rows by the A2W ratio  
   dt2 <- dt2 %>%  
     arrange(A2W)  
   dt4 <- bind_rows(dt2, dtStatsRows)  
   dt[["tableGradRace"]] <- dt4  
   #  
   return(dt)  
 }  
 #  
 splitTableGradRace <- function(dt){  
 ### Function splits tableGradRace into two parts, one for known  
 ### races, the other for DegreeAreas, All, Others, TwoOrMore,   
 ### Unknown, NonResAlien  
   #  
   dt1 <- dt[["tableGradRace"]] %>%  
     select(-c(Others, TwoOrMore, Unknown, NonResAlien))  
   #  
   dt2 <- dt[["tableGradRace"]] %>%  
     select(DegreeAreas, All, Others, TwoOrMore, Unknown, NonResAlien)  
   #  
   dt[["tableGradRaceRaces"]] <- dt1  
   dt[["tableGradRaceOthers"]] <- dt2  
   #  
   return(dt)  
 }  
 #  
 makeChartGradRaceA2W <- function(dt) {  
 ###   
 ### Function creats charts showing Asian to White ratios in  
 ###   all IPEDS degree areas, marking the A2W overall ratio  
   #   
   title = paste0(dt[["institution"]],"'s Asian Focus Areas")  
   dt1 <- dt[["tableGradRace"]]  
   #  
   # drop the stats rows = totals, subtotals, percentages  
   nRows <- dim(dt1)[1]  
   dt2 <- dt1[-c(nRows: (nRows-3)),] ### ddrop stat rows   
   dt2 <- dt2 %>%  
     arrange(desc(A2W))  
   dt2$DegreeAreas <- ordered(dt2$DegreeAreas, levels=dt2$DegreeAreas)  
   #  
   xLabel = "IPEDS Degree Areas"  
   minA2W <- dt[["minA2W"]]  
   yLabel = paste("Asian/White Ratios -- min A2W for AFA = ", minA2W)  
   #  
   gg <- ggplot(dt2, aes(x=DegreeAreas, y = A2W)) +  
     geom_bar(fill="turquoise2", stat="identity", position = "dodge") +  
     geom_hline(aes(yintercept = minA2W), colour="red", linetype="dashed") +  
     ggtitle(title) + xlab(xLabel) + ylab(yLabel) +   
     theme(plot.title = element_text(size=14, face="bold")) +  
     theme(plot.title = element_text(size=12), legend.position="bottom") +  
     coord_flip(xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")  
   #  
   dt[["chartGradRaceA2W"]] <- gg  
   return(dt)  
 }  
 #  
 makeChartGradRace <- function(dt) {  
   ### Function creats charts of tableGradRace, i.e., distributions  
   ### of White, Asian grads.  
   #   
   title = paste0(dt[["shortName"]], "'s Asian & White Grads, Class of 2016")  
   dt1 <- dt[["tableGradRace"]]   
   # drop the stats rows = totals, subtotals, percentages  
   nRows <- dim(dt1)[1]  
   dt2 <- dt1[-c(nRows: (nRows-3)),]   
   dt2 <- arrange(dt2, desc(A2W))  
   dt2$DegreeAreas <- ordered(dt2$DegreeAreas, levels=dt2$DegreeAreas)  
   #  
   # Wrangle data into narrow format  
   dt2 <- dt2 %>%  
     select(-All) %>% # remove All category  
     gather(key="race", value="graduates", -DegreeAreas) %>%  
     filter(race %in% c("White", "Asian"))  
   #  
   xLabel = "IPEDS Degree Areas"  
   yLabel = paste("Number of Graduates")  
   #  
   gg <- ggplot(dt2, aes(x=DegreeAreas, y = graduates)) +  
     geom_bar(aes(fill=race), stat="identity", position = "dodge") +  
     ggtitle(title) + xlab(xLabel) + ylab(yLabel) +   
     theme(plot.title = element_text(size=14, face="bold")) +  
     scale_fill_brewer(palette = "Paired") +  
     theme(plot.title = element_text(size=12), legend.position="bottom") +  
     coord_flip(xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")  
   #  
   dt[["chartGradRace"]] <- gg  
   return(dt)  
 }  
 #  
 addPrefixToAFArows <- function(dt, colname, patterns, prefix){  
 ###  
 ### Function searches for "patterns" in a character column,  
 ### if found, replaces <patterns> with <prefix><patterns>  
 #  
   column <- unlist(dt[, colname])  
   L <- length(column)  
   for(i in 1:L) {  
     lVec <- str_detect(patterns, column[i])  
     if (sum(lVec) > 0) {  
       # Only replace exact match  
       # e.g., if looking for "Engineering", don't  
       # accept "EngineeringTechnology"  
       column[i] <- paste0(prefix, column[i])  
     }  
   }  
   dt[, colname] <- column  
   #  
   return(dt)  
 }  
 #  
 makeTablesAndCharts <- function(dt, shortName){  
 ###  
 ### Master function that manages the creation of new components of   
 ###   the data object for each institution, i.e., tableEnroll,   
 ###   tableGradRace, tableAsianFocus, and chartGradRace  
 ###   Each object contains "institution" and "allGradData16"  
 ###   that were created by the master1.R script and loaded  
 ###   at the start of excution of master2.R script     
 #  
   dt[["shortName"]] <- shortName  
   dt <- makeTableGradRace(dt)  
   dt <- modifyTableGradRace(dt)  
   dt <- splitTableGradRace(dt)  
   dt <- makeChartGradRace(dt)  
   dt <- makeChartGradRaceA2W(dt)  
   return(dt)  
 }  
   
 #############################  
 ############################  
 #  
 ### C. Execute the script commands that call the functions   
 #     
 Harvard <- makeTablesAndCharts(Harvard, shortName="Harvard")  
 Yale <- makeTablesAndCharts(Yale, shortName="Yale")  
 Princeton <- makeTablesAndCharts(Princeton, shortName="Princeton")  
 Cornell <- makeTablesAndCharts(Cornell, shortName="Cornell")  
 Columbia <- makeTablesAndCharts(Columbia, shortName="Columbia")  
 Dartmouth <- makeTablesAndCharts(Dartmouth, shortName="Dartmouth")  
 Brown <- makeTablesAndCharts(Brown, shortName="Brown")  
 Penn <- makeTablesAndCharts( Penn, shortName="Penn")  
   
 Stanford <- makeTablesAndCharts(Stanford, shortName="Stanford")  
 Duke <- makeTablesAndCharts(Duke, shortName="Duke")  
 Northwestern <- makeTablesAndCharts(Northwestern, shortName="Northwestern")  
 Rice <- makeTablesAndCharts(Rice, shortName="Rice")  
 Tufts <- makeTablesAndCharts(Tufts, shortName="Tufts")  
 Amherst <- makeTablesAndCharts(Amherst, shortName="Amherst")  
 NotreDame <- makeTablesAndCharts(NotreDame, shortName="NotreDame")  
 Vanderbilt <- makeTablesAndCharts(Vanderbilt, shortName="Vanderbilt")  
 Washington <- makeTablesAndCharts(Washington, shortName="Washington")  
 IllinoisUC <- makeTablesAndCharts(Illinois, shortName="IllinoisUC")  
 CarnegieM <- makeTablesAndCharts(Carnegie, shortName="CarnegieM")  
 MIT <- makeTablesAndCharts(MIT, shortName="MIT")  
 CalTech <- makeTablesAndCharts(CalTech, shortName="CalTech")  
   
 #  
 listInstitutions <- list(Harvard = Harvard, Yale=Yale,   
              Princeton=Princeton,  
              Cornell=Cornell, Columbia = Columbia,  
              Dartmouth=Dartmouth, Brown=Brown,   
              Penn=Penn,   
              Stanford=Stanford, Duke=Duke,  
              Northwestern=Northwestern, Rice=Rice,  
              Tufts=Tufts, Amherst=Amherst,   
              NotreDame=NotreDame, Vanderbilt=Vanderbilt,  
              Washington=Washington, IllinoisUC=IllinoisUC,  
              CarnegieM=CarnegieM, MIT=MIT, CalTech=CalTech)  
 #  
 save(file="master2.rda",   
   listInstitutions,  
   tableSummaryEnrollDraft)  
   
 ######### EXAMPLES ################  
 tableHarvardGradRace <- Harvard[["tableGradRace"]]  
 tableStanfordGradRace <- Stanford[["tableGradRace"]]  
 #  
 chartHarvardGradRace <- Harvard[["chartGradRace"]]  
 chartHarvardGradRace  
 #  
 Harvard2 <- listInstitutions[["Harvard"]]  
 tableHarvard2GradRace = Harvard2[["tableGradRace"]]
TECH-Levers

R-code master2

No comments:

Post a Comment