R-code master3

 ### Creates summary tables and summary charts  
 #    
 ### A. Set the working directory, load required libraries, and read IPEDS data  
 rm(list = ls()) ### clear the environment  
 home <- "/blah-blah...blah-blah/PROJECTS/PROJECT-AFFIRMATIVE-ACTION-LEGAL-CHALLENGE/"  
 setwd(home)  
 load(file="master2.rda")  
 #  
 ### Developed using R = version 3.5.1 and R Studio = version 1.456  
 ### install.packages("tidyverse")  
 library(tidyverse)  
 ###library(RColorBrewer)  
 #  
 ### B. Source the local functions  
 #################################  
 #################################  
 #   
 makePerVec <- function(vec, firstTotal = TRUE, dec=0) {  
 ### Given a vector, function calculates sum of vec, then percent  
 ###   for each value, and returns percent vector  
 ### If firstTotal = TRUE, function assumes the first element of  
 ###   input is the sum of the other values, even if it isn't  
 ### Always returns 100 as first value in percent vector  
   #  
   if (firstTotal != TRUE) {  
     total = sum(vec)  
     vec <- c(total, vec)  
   }  
   perVec <- round(100 * (vec[-1]/vec[1]), dec)  
   perVec <- c(100, perVec)  
   return(perVec)  
 }  
 #   
 makeTableSummaryGrads <- function(dt, dec = 0) {  
 ### Function creates summary table that displays important stats  
 ###   about concentration of Asians in focus areas at 21 institutions  
 ###   institution, totGrads, perWhiteGrads, perAsianGrads, nIPEDS,   
 ###   nFocus, perWhiteFocus, perAsianFocus  
   #  
   # Initialize empty data frame  
   dt2 <- NULL  
   #  
   for (inst in dt) {  
     shortName <- inst[["shortName"]]   
     dt1 <- as.data.frame(inst[["tableGradRace"]])  
     rownames(dt1) <- dt1[,1] ### use first column as names  
     nRows <- nrow(dt1)  
     nIPEDS <- nRows - 4 # subtract 4 summary rows  
     dt1 <- dt1[c(nRows-3, nRows-2, nRows-1, nRows), ]  
     asianFocusAreas <- inst[["asianFocusAreas"]]  
     nFocus <- length(asianFocusAreas)  
     #  
     totGrads <- dt1["...SumClassOf2016...", "All"]  
     whiteGrads <- dt1["...SumClassOf2016...", "White"]  
     perWhiteGrads <- round((100 * whiteGrads/totGrads), dec)  
     asianGrads <- dt1["...SumClassOf2016...", "Asian"]  
     perAsianGrads <- round((100 * asianGrads/totGrads), dec)  
     perWhiteInFocus <- dt1["...PercentAsianFocus...", "White"]  
     perAsianInFocus <- dt1["...PercentAsianFocus...", "Asian"]  
     #  
     dfRow <- data.frame(shortName, totGrads, perWhiteGrads,  
             perAsianGrads, nIPEDS, nFocus,   
             perWhiteInFocus, perAsianInFocus,   
             stringsAsFactors = FALSE)  
     dt2 <- bind_rows(dt2, dfRow)  
   }  
   #  
   dt2 <- arrange(dt2, desc(perAsianInFocus))  
   colnames(dt2) <- c("Institution", "Total Grads", "% White Grads",  
                 "% Asian Grads", "Num IPEDS", "Num Focus",  
                 "% White In Focus", "% Asian In Focus")  
   dt2 <- as_tibble(dt2)  
   return(dt2)  
 }  
 #  
 makeTableSummaryEnrollRacesOthers <- function(draft, listInstitutions, Others=NULL){  
   ### Function substitutes short names for long names in enroll table  
   ### Substitutes percents of Total for all variables except Total  
   #  
   draft <- as.data.frame(draft)  
   rownames(draft) <- unclass(draft[,1])  
   L <- length(listInstitutions)  
   for (i in 1:L) {  
     shortName <- listInstitutions[[i]][["shortName"]]  
     longName <- listInstitutions[[i]][["institution"]]  
     draft[longName,1] <- shortName # Using rownames here  
   }  
   rownames(draft) <- NULL  
   dt <- as_tibble(draft)  
   dt <- arrange(dt, Institution)  
   #   
   if (is.null(Others)) {  
     #Computer percentage of races  
     dt <- dt %>%  
       mutate("% White" = round((100 * White/Total), 1),  
           "% Asian" = round((100 * Asian/Total), 1),  
           "% Black" = round((100 * Black/Total), 1),  
           "% Hispanic" = round((100 * Hispanic/Total), 1)) %>%  
       rename(M25th = M25th15) %>%  
       select(Institution, Total, "% White", "% Asian",   
         "% Black", "% Hispanic", M25th)  
   } else {  
     # Compute percentage of other categories  
     dt <- dt %>%  
       mutate(Others = AmIndAlNat + NatHawPac,  
           OthersPer = round((100 * Others/Total), 1),  
           NonResAlienPer = round((100 * NonResAlien/Total), 1),  
           TwoOrMorePer = round((100 * TwoOrMore/Total), 1),  
           UnknownPer = round((100 * Unknown/Total), 1)) %>%  
       rename("% Others" = Others,   
           "% NonRes-Alien" = NonResAlienPer,  
           "% TwoOr-More" = TwoOrMorePer,  
           "% Unknown" = UnknownPer) %>%  
       select(Institution, Total, "% Others",   
           "% TwoOr-More", "% Unknown", "% NonRes-Alien")  
   }  
   return(dt)  
 }  
 #  
 makeChartSummaryGrads <- function(dt){  
   #   
   title = "Percent White vs. Percent Asian in Asian Focus Areas"  
   dt1 <- dt %>%   
     select(`Institution`, `% White In Focus`,`% Asian In Focus` ) %>%  
     rename(WhiteIN = `% White In Focus`) %>%  
     rename(AsianIN = `% Asian In Focus`) %>%  
     arrange(AsianIN)  
   dt1$Institution <- ordered(dt1$Institution,   
                 levels=dt1$Institution)  
   #  
   # Wrangle data into narrow format  
   dt1 <- dt1 %>%  
     gather(key="Race", value="Percent", -Institution)   
   #  
   xLabel = "Institution"  
   yLabel = "Percent in Asian Focus Areas"  
   #  
   gg <- ggplot(dt1, aes(x=Institution, y = Percent)) +  
     geom_bar(aes(fill=Race), stat="identity", position = "dodge") +  
     ggtitle(title) + xlab(xLabel) + ylab(yLabel) +   
     theme(plot.title = element_text(size=12, face="bold"), legend.position="bottom") +  
     scale_fill_brewer(palette = "Paired") +  
     coord_flip(xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")   
   #  
   return(gg)  
 }  
 #  
 makeChartSummaryNamesTop5AFAs <- function(dt){  
 ### Function identifies which IPEDS degree areas are the   
 ### most popular AFAs at the 21 selected institutions  
 #  
 ### Loops throgh all institutions to obtain names of AFAs   
 ###   then tabulates their frequency using table function  
 ###   then produces a bar chart of names vs. frequency  
 #  
   allNamesAFA <- NULL  
   for (inst in dt) {  
     allNamesAFA <- c(allNamesAFA, inst[["asianFocusAreas"]])  
   }  
   #  
   namesAFA <- table(allNamesAFA)  
   dt <- as.tibble(namesAFA)  
   colnames(dt) <- c("AFA", "N")  
   dt <- arrange(dt, desc(N))[1:5,]  
   dt$AFA <- ordered(dt$AFA, levels=dt$AFA)  
   #    
   title = paste("Top Five Asian Focus Areas")  
   xLabel = "Asian Focus Areas"  
   yLabel = paste("Number of Institutions Having These Asian Focus Areas")  
   #  
   gg <- ggplot(dt, aes(x=AFA, y = N)) +  
     geom_bar(fill="turquoise2", stat="identity", position = "dodge", width=0.7) +  
     geom_text(aes(label=N), hjust=-0.2, colour="turquoise3", size=3, fontface="bold.italic") +  
     ggtitle(title) + xlab(xLabel) + ylab(yLabel) +   
     theme(plot.title = element_text(size=12, face="bold"), legend.position="bottom") +  
     coord_flip(xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")  
   #  
   return(gg)   
 }  
 #  
 makeTableHarvardTech <- function(dt, inst1="Harvard", inst2="MIT"){  
   Harvard <- dt[[inst1]]  
   MIT <- dt[[inst2]]  
   tabHarvard <- Harvard[["tableGradRace"]]  
   tabMIT <- MIT[["tableGradRace"]]  
   #  
   # Get last two rows for Havard ... Asian Focus Area stats  
   hRows <- dim(tabHarvard)[1]  
   tabHarvard <- tabHarvard[hRows-1,]  
   tabHarvard <- rename(tabHarvard, Institution = DegreeAreas)  
   tabHarvard[1,1] <- "HarvardTech"  
   tabHarvard <- select(tabHarvard, -A2W)  
   perHarvard <- makePerVec(as.numeric(tabHarvard[1, -1]), firstTotal = TRUE, dec=0)  
   names(perHarvard) <- names(tabHarvard[-1])  
   tabHarvard <- bind_rows(tabHarvard, perHarvard)  
   tabHarvard[2,1] <- "HarvardTech %"  
   #  
   # Get two preceding rows for MIT ... stats for all students  
   mRows <- dim(tabMIT)[1]  
   tabMIT <- tabMIT[mRows-3,]  
   tabMIT <- rename(tabMIT, Institution = DegreeAreas)  
   tabMIT[1,1] <- "M.I.T."  
   tabMIT <- select(tabMIT, -A2W)  
   perMIT <- makePerVec(as.numeric(tabMIT[1, -1]), firstTotal = TRUE, dec=0)  
   names(perMIT) <- names(tabMIT[-1])  
   tabMIT <- bind_rows(tabMIT, perMIT)  
   tabMIT[2,1] <- "M.I.T. %"  
   #  
   tableHarvardTech <- bind_rows(tabMIT, tabHarvard)  
   tableHarvardTech <- select(tableHarvardTech, -c(White, Asian))  
   return(tableHarvardTech)  
 }  
 #  
 #############################  
 ############################  
 #  
 ### C. Execute the script commands that call the functions   
 tableSummaryGrads <- makeTableSummaryGrads(listInstitutions)  
 #  
   
 #####   
 tableSummaryEnrollRaces <- makeTableSummaryEnrollRacesOthers(tableSummaryEnrollDraft,                   listInstitutions)  
 tableSummaryEnrollOthers <- makeTableSummaryEnrollRacesOthers(tableSummaryEnrollDraft,                   listInstitutions, "Others")  
 tableHarvardTech <- makeTableHarvardTech(listInstitutions, "Harvard", "MIT")  
 #  
 chartSummaryGrads <- makeChartSummaryGrads(tableSummaryGrads)  
 chartSummaryGrads  
   
 chartSummaryNamesTop5AFAs <- makeChartSummaryNamesTop5AFAs(listInstitutions)  
 chartSummaryNamesTop5AFAs  
 #  
 save(file="master3.rda", listInstitutions, tableHarvardTech, tableSummaryGrads, tableSummaryEnrollRaces, tableSummaryEnrollOthers, chartSummaryGrads, chartSummaryNamesTop5AFAs)   
   
 ##################  
 #Examples  
 ratios <- tableSummaryGrads$`Num Focus` / tableSummaryGrads$`Num IPEDS`  
 sum(ratios < .4)  
   
 Harvard <- listInstitutions[["Harvard"]]  
 tableHarvardGradRace<-Harvard[["tableGradRace"]]  
   
 #  
 tabHarvard <- tabHarvard %>%  
   select(-c(White, Asian, A2W))   
 tabMIT <- tabMIT %>%  
   select(-c(White, Asian, A2W))  
   

No comments:

Post a Comment

Thank you!!! Your comments and suggestions will be greatly appreciated ... :-)