### Script generates tables and charts for each institution
### ... adds components to list object for each institution
#
### A. Set the working directory, load required libraries,
### and load .rda file containing saved objects
rm(list = ls()) ### clear the environment
home <- "/blah-blah...blah-blah/PROJECT-AFFIRMATIVE-ACTION-LEGAL-CHALLENGE/"
setwd(home)
load(file="master1.rda")
### Developed using R = version 3.5.1 and R Studio = version 1.456
### install.packages("tidyverse")
library(tidyverse)
#
### B. Source the local functions
#################################
#################################
makeTableGradRace <- function(dt) {
### Function creates tableGradRace that contains the distribution
### of graduates in the 39 IPEDS degree areas for each racial group
#
dt2 <- dt[["allGradData16"]]
areaNames <- colnames(dt2[-c(1)])
raceNames <- dt2$race
#
# Trnspose the tibble
dt2 <- t(dt2)
dt2 <- dt2[-1,]
rowNames <- rownames(dt2)
dimMat <- dim(dt2)
#
# Convert character to integer matrix
vec <- as.integer(dt2)
mat <- matrix(vec, nrow = dimMat[1], ncol = dimMat[2])
dt2 <- as.data.frame(mat)
colnames(dt2) <- raceNames
#
areaNames <- as.data.frame(areaNames)
colnames(areaNames) <- "DegreeAreas"
areaNames$DegreeAreas <- as.character(areaNames$DegreeAreas)
dt2 <- data.frame(areaNames, dt2)
#
dt2 <- dt2 %>%
# Capitalize races for display in tables
mutate(Others = total - (white + black + hispanic + asian
+ nonResAlien + unknown + twoOrMoreRaces)) %>%
rename(All = total, White = white,
Black = black, Hispanic = hispanic,
Asian = asian, Unknown = unknown,
NonResAlien = nonResAlien,
TwoOrMore = twoOrMoreRaces) %>%
# Reorder
select(DegreeAreas, All, White, Asian, Hispanic, Black,
TwoOrMore, Unknown, NonResAlien, Others) %>%
arrange(desc(Asian))
#
dt2 <- as.tibble(dt2)
#
# Remove internal prefix/suffix
dt2$DegreeAreas <- str_replace(dt2$DegreeAreas, "Grads", "")
dt2$DegreeAreas <- str_replace(dt2$DegreeAreas, "16", "")
#
################### IMPORTANT CRITERION
minA2W <- round(dt2[1, "Asian"]/dt2[1, "White"],2)
##################
###################
#
dt[["minA2W"]] <- as.numeric(unclass(minA2W))
dt[["tableGradRace"]] <- dt2
return(dt)
}
#
modifyTableGradRace <- function(dt) {
### Function adds statistical rows to tableGradRace
### plus an A2W column
### and adds "AFA" prefix to degrees in Asian Focus
#
minA2W <- dt[["minA2W"]]
dt1 <- dt[["tableGradRace"]]
dt1 <- dt1 %>%
mutate(A2W = round(Asian/White, 2))
dt1[1,1] <- "...SumClassOf2016..." ### rename total area
#
allAreasLine <- dt1[1,]
PercentClassOf2016 <- round((100 * allAreasLine[1,-1] / unclass(allAreasLine[1,2])), 0)
allAreas2Lines <- bind_rows(allAreasLine, PercentClassOf2016)
allAreas2Lines[2,1] <- "...PercentClassOf2016..."
allAreas2Lines[2, "A2W"] <- allAreas2Lines[1, "A2W"]
dt1 <- dt1[-1,] # drop original total line
dt1 <- bind_rows(dt1, allAreas2Lines) # add totals and % to bottom
#
# Create temporary table to store the areas in the Asian focus
# ... and compute stats on them
tableAsianFocus <- dt1
# Drop the totals and percent rows
nRows <- dim(dt1)[1]
tableAsianFocus <- tableAsianFocus[-c(nRows, nRows-1),] %>%
filter(A2W > minA2W)
asianFocusAreaData <- tableAsianFocus[, -1] ## not 1st col = names of areas
totAsianFocusAreaData <- colSums(asianFocusAreaData, na.rm = TRUE)
dt1 <- bind_rows(dt1, totAsianFocusAreaData)
nRows <- dim(dt1)[1]
dt1[nRows, 1] <- "...SumAsianFocusAreas..."
# Recompute the A2W value ... not the sum of A2W
dt1[nRows, "A2W"] <- round(dt1[nRows, "Asian"]/dt1[nRows, "White"], 2)
#
perAllInAsianFocusAreas <- round(100 * totAsianFocusAreaData/allAreasLine[,-1], 0)
perAllInAsianFocusAreas$DegreeAreas <- "perInAsianFocus"
perAllInAsianFocusAreas["A2W"] <- round(perAllInAsianFocusAreas["Asian"]/perAllInAsianFocusAreas["White"], 2)
#
dt1 <- bind_rows(dt1, perAllInAsianFocusAreas)
dt1[nRows + 1, 1] <- "...PercentAsianFocus..."
nRows <- dim(dt1)[1]
dtStatsRows <- dt1[c(nRows-3, nRows-2, nRows-1, nRows),]
#
# Save names of categories in Asian focus area
###rows <- dim(tableAsianFocus)[1]
asianFocusAreas <- tableAsianFocus[, 1]
asianFocusAreas <- unlist(asianFocusAreas)
names(asianFocusAreas) <- NULL
dt[["asianFocusAreas"]] <- asianFocusAreas
#
patterns <- asianFocusAreas
prefix <- "AFA-"
dt1 <- addPrefixToAFArows(dt1, "DegreeAreas", patterns, prefix)
# stat rows prefixed by "..." will be at bottom; "AFA-" rows will be above stats
#
### Some small institutions may not give degrees in
### every area every year
### Also SocialSciencesWithoutEconPoli may be zero if
### SocSci degrees were only Econ and/or PoliSci
dt1 <- dt1 %>%
filter(All > 0) %>%
# This sort places the stats rows at bottom
# because of "..." prefixes
arrange(desc(DegreeAreas))
#
nRows <- dim(dt1)[1]
# dt2 rows contain degree area data
dt2 <- dt1[-c(nRows, nRows-1, nRows-2, nRows-3),]
# Sort the degree area rows by the A2W ratio
dt2 <- dt2 %>%
arrange(A2W)
dt4 <- bind_rows(dt2, dtStatsRows)
dt[["tableGradRace"]] <- dt4
#
return(dt)
}
#
splitTableGradRace <- function(dt){
### Function splits tableGradRace into two parts, one for known
### races, the other for DegreeAreas, All, Others, TwoOrMore,
### Unknown, NonResAlien
#
dt1 <- dt[["tableGradRace"]] %>%
select(-c(Others, TwoOrMore, Unknown, NonResAlien))
#
dt2 <- dt[["tableGradRace"]] %>%
select(DegreeAreas, All, Others, TwoOrMore, Unknown, NonResAlien)
#
dt[["tableGradRaceRaces"]] <- dt1
dt[["tableGradRaceOthers"]] <- dt2
#
return(dt)
}
#
makeChartGradRaceA2W <- function(dt) {
###
### Function creats charts showing Asian to White ratios in
### all IPEDS degree areas, marking the A2W overall ratio
#
title = paste0(dt[["institution"]],"'s Asian Focus Areas")
dt1 <- dt[["tableGradRace"]]
#
# drop the stats rows = totals, subtotals, percentages
nRows <- dim(dt1)[1]
dt2 <- dt1[-c(nRows: (nRows-3)),] ### ddrop stat rows
dt2 <- dt2 %>%
arrange(desc(A2W))
dt2$DegreeAreas <- ordered(dt2$DegreeAreas, levels=dt2$DegreeAreas)
#
xLabel = "IPEDS Degree Areas"
minA2W <- dt[["minA2W"]]
yLabel = paste("Asian/White Ratios -- min A2W for AFA = ", minA2W)
#
gg <- ggplot(dt2, aes(x=DegreeAreas, y = A2W)) +
geom_bar(fill="turquoise2", stat="identity", position = "dodge") +
geom_hline(aes(yintercept = minA2W), colour="red", linetype="dashed") +
ggtitle(title) + xlab(xLabel) + ylab(yLabel) +
theme(plot.title = element_text(size=14, face="bold")) +
theme(plot.title = element_text(size=12), legend.position="bottom") +
coord_flip(xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")
#
dt[["chartGradRaceA2W"]] <- gg
return(dt)
}
#
makeChartGradRace <- function(dt) {
### Function creats charts of tableGradRace, i.e., distributions
### of White, Asian grads.
#
title = paste0(dt[["shortName"]], "'s Asian & White Grads, Class of 2016")
dt1 <- dt[["tableGradRace"]]
# drop the stats rows = totals, subtotals, percentages
nRows <- dim(dt1)[1]
dt2 <- dt1[-c(nRows: (nRows-3)),]
dt2 <- arrange(dt2, desc(A2W))
dt2$DegreeAreas <- ordered(dt2$DegreeAreas, levels=dt2$DegreeAreas)
#
# Wrangle data into narrow format
dt2 <- dt2 %>%
select(-All) %>% # remove All category
gather(key="race", value="graduates", -DegreeAreas) %>%
filter(race %in% c("White", "Asian"))
#
xLabel = "IPEDS Degree Areas"
yLabel = paste("Number of Graduates")
#
gg <- ggplot(dt2, aes(x=DegreeAreas, y = graduates)) +
geom_bar(aes(fill=race), stat="identity", position = "dodge") +
ggtitle(title) + xlab(xLabel) + ylab(yLabel) +
theme(plot.title = element_text(size=14, face="bold")) +
scale_fill_brewer(palette = "Paired") +
theme(plot.title = element_text(size=12), legend.position="bottom") +
coord_flip(xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")
#
dt[["chartGradRace"]] <- gg
return(dt)
}
#
addPrefixToAFArows <- function(dt, colname, patterns, prefix){
###
### Function searches for "patterns" in a character column,
### if found, replaces <patterns> with <prefix><patterns>
#
column <- unlist(dt[, colname])
L <- length(column)
for(i in 1:L) {
lVec <- str_detect(patterns, column[i])
if (sum(lVec) > 0) {
# Only replace exact match
# e.g., if looking for "Engineering", don't
# accept "EngineeringTechnology"
column[i] <- paste0(prefix, column[i])
}
}
dt[, colname] <- column
#
return(dt)
}
#
makeTablesAndCharts <- function(dt, shortName){
###
### Master function that manages the creation of new components of
### the data object for each institution, i.e., tableEnroll,
### tableGradRace, tableAsianFocus, and chartGradRace
### Each object contains "institution" and "allGradData16"
### that were created by the master1.R script and loaded
### at the start of excution of master2.R script
#
dt[["shortName"]] <- shortName
dt <- makeTableGradRace(dt)
dt <- modifyTableGradRace(dt)
dt <- splitTableGradRace(dt)
dt <- makeChartGradRace(dt)
dt <- makeChartGradRaceA2W(dt)
return(dt)
}
#############################
############################
#
### C. Execute the script commands that call the functions
#
Harvard <- makeTablesAndCharts(Harvard, shortName="Harvard")
Yale <- makeTablesAndCharts(Yale, shortName="Yale")
Princeton <- makeTablesAndCharts(Princeton, shortName="Princeton")
Cornell <- makeTablesAndCharts(Cornell, shortName="Cornell")
Columbia <- makeTablesAndCharts(Columbia, shortName="Columbia")
Dartmouth <- makeTablesAndCharts(Dartmouth, shortName="Dartmouth")
Brown <- makeTablesAndCharts(Brown, shortName="Brown")
Penn <- makeTablesAndCharts( Penn, shortName="Penn")
Stanford <- makeTablesAndCharts(Stanford, shortName="Stanford")
Duke <- makeTablesAndCharts(Duke, shortName="Duke")
Northwestern <- makeTablesAndCharts(Northwestern, shortName="Northwestern")
Rice <- makeTablesAndCharts(Rice, shortName="Rice")
Tufts <- makeTablesAndCharts(Tufts, shortName="Tufts")
Amherst <- makeTablesAndCharts(Amherst, shortName="Amherst")
NotreDame <- makeTablesAndCharts(NotreDame, shortName="NotreDame")
Vanderbilt <- makeTablesAndCharts(Vanderbilt, shortName="Vanderbilt")
Washington <- makeTablesAndCharts(Washington, shortName="Washington")
IllinoisUC <- makeTablesAndCharts(Illinois, shortName="IllinoisUC")
CarnegieM <- makeTablesAndCharts(Carnegie, shortName="CarnegieM")
MIT <- makeTablesAndCharts(MIT, shortName="MIT")
CalTech <- makeTablesAndCharts(CalTech, shortName="CalTech")
#
listInstitutions <- list(Harvard = Harvard, Yale=Yale,
Princeton=Princeton,
Cornell=Cornell, Columbia = Columbia,
Dartmouth=Dartmouth, Brown=Brown,
Penn=Penn,
Stanford=Stanford, Duke=Duke,
Northwestern=Northwestern, Rice=Rice,
Tufts=Tufts, Amherst=Amherst,
NotreDame=NotreDame, Vanderbilt=Vanderbilt,
Washington=Washington, IllinoisUC=IllinoisUC,
CarnegieM=CarnegieM, MIT=MIT, CalTech=CalTech)
#
save(file="master2.rda",
listInstitutions,
tableSummaryEnrollDraft)
######### EXAMPLES ################
tableHarvardGradRace <- Harvard[["tableGradRace"]]
tableStanfordGradRace <- Stanford[["tableGradRace"]]
#
chartHarvardGradRace <- Harvard[["chartGradRace"]]
chartHarvardGradRace
#
Harvard2 <- listInstitutions[["Harvard"]]
tableHarvard2GradRace = Harvard2[["tableGradRace"]]
This blog was established by the Digital Learning Lab to provide information that supports Black America’s efforts to close the Digital Divide. Its original focus on HBCUs has been broadened to include other colleges, universities, and community-based groups that enhance the computational thinking skills of Black Americans and the networks of successful Black techs who support each others’ efforts to achieve even greater success.
R-code master2
Subscribe to:
Posts (Atom)
No comments:
Post a Comment
Thank you!!! Your comments and suggestions will be greatly appreciated ... :-)