### Creates summary tables and summary charts
#
### A. Set the working directory, load required libraries, and read IPEDS data
rm(list = ls()) ### clear the environment
home <- "/blah-blah...blah-blah/PROJECTS/PROJECT-AFFIRMATIVE-ACTION-LEGAL-CHALLENGE/"
setwd(home)
load(file="master2.rda")
#
### Developed using R = version 3.5.1 and R Studio = version 1.456
### install.packages("tidyverse")
library(tidyverse)
###library(RColorBrewer)
#
### B. Source the local functions
#################################
#################################
#
makePerVec <- function(vec, firstTotal = TRUE, dec=0) {
### Given a vector, function calculates sum of vec, then percent
### for each value, and returns percent vector
### If firstTotal = TRUE, function assumes the first element of
### input is the sum of the other values, even if it isn't
### Always returns 100 as first value in percent vector
#
if (firstTotal != TRUE) {
total = sum(vec)
vec <- c(total, vec)
}
perVec <- round(100 * (vec[-1]/vec[1]), dec)
perVec <- c(100, perVec)
return(perVec)
}
#
makeTableSummaryGrads <- function(dt, dec = 0) {
### Function creates summary table that displays important stats
### about concentration of Asians in focus areas at 21 institutions
### institution, totGrads, perWhiteGrads, perAsianGrads, nIPEDS,
### nFocus, perWhiteFocus, perAsianFocus
#
# Initialize empty data frame
dt2 <- NULL
#
for (inst in dt) {
shortName <- inst[["shortName"]]
dt1 <- as.data.frame(inst[["tableGradRace"]])
rownames(dt1) <- dt1[,1] ### use first column as names
nRows <- nrow(dt1)
nIPEDS <- nRows - 4 # subtract 4 summary rows
dt1 <- dt1[c(nRows-3, nRows-2, nRows-1, nRows), ]
asianFocusAreas <- inst[["asianFocusAreas"]]
nFocus <- length(asianFocusAreas)
#
totGrads <- dt1["...SumClassOf2016...", "All"]
whiteGrads <- dt1["...SumClassOf2016...", "White"]
perWhiteGrads <- round((100 * whiteGrads/totGrads), dec)
asianGrads <- dt1["...SumClassOf2016...", "Asian"]
perAsianGrads <- round((100 * asianGrads/totGrads), dec)
perWhiteInFocus <- dt1["...PercentAsianFocus...", "White"]
perAsianInFocus <- dt1["...PercentAsianFocus...", "Asian"]
#
dfRow <- data.frame(shortName, totGrads, perWhiteGrads,
perAsianGrads, nIPEDS, nFocus,
perWhiteInFocus, perAsianInFocus,
stringsAsFactors = FALSE)
dt2 <- bind_rows(dt2, dfRow)
}
#
dt2 <- arrange(dt2, desc(perAsianInFocus))
colnames(dt2) <- c("Institution", "Total Grads", "% White Grads",
"% Asian Grads", "Num IPEDS", "Num Focus",
"% White In Focus", "% Asian In Focus")
dt2 <- as_tibble(dt2)
return(dt2)
}
#
makeTableSummaryEnrollRacesOthers <- function(draft, listInstitutions, Others=NULL){
### Function substitutes short names for long names in enroll table
### Substitutes percents of Total for all variables except Total
#
draft <- as.data.frame(draft)
rownames(draft) <- unclass(draft[,1])
L <- length(listInstitutions)
for (i in 1:L) {
shortName <- listInstitutions[[i]][["shortName"]]
longName <- listInstitutions[[i]][["institution"]]
draft[longName,1] <- shortName # Using rownames here
}
rownames(draft) <- NULL
dt <- as_tibble(draft)
dt <- arrange(dt, Institution)
#
if (is.null(Others)) {
#Computer percentage of races
dt <- dt %>%
mutate("% White" = round((100 * White/Total), 1),
"% Asian" = round((100 * Asian/Total), 1),
"% Black" = round((100 * Black/Total), 1),
"% Hispanic" = round((100 * Hispanic/Total), 1)) %>%
rename(M25th = M25th15) %>%
select(Institution, Total, "% White", "% Asian",
"% Black", "% Hispanic", M25th)
} else {
# Compute percentage of other categories
dt <- dt %>%
mutate(Others = AmIndAlNat + NatHawPac,
OthersPer = round((100 * Others/Total), 1),
NonResAlienPer = round((100 * NonResAlien/Total), 1),
TwoOrMorePer = round((100 * TwoOrMore/Total), 1),
UnknownPer = round((100 * Unknown/Total), 1)) %>%
rename("% Others" = Others,
"% NonRes-Alien" = NonResAlienPer,
"% TwoOr-More" = TwoOrMorePer,
"% Unknown" = UnknownPer) %>%
select(Institution, Total, "% Others",
"% TwoOr-More", "% Unknown", "% NonRes-Alien")
}
return(dt)
}
#
makeChartSummaryGrads <- function(dt){
#
title = "Percent White vs. Percent Asian in Asian Focus Areas"
dt1 <- dt %>%
select(`Institution`, `% White In Focus`,`% Asian In Focus` ) %>%
rename(WhiteIN = `% White In Focus`) %>%
rename(AsianIN = `% Asian In Focus`) %>%
arrange(AsianIN)
dt1$Institution <- ordered(dt1$Institution,
levels=dt1$Institution)
#
# Wrangle data into narrow format
dt1 <- dt1 %>%
gather(key="Race", value="Percent", -Institution)
#
xLabel = "Institution"
yLabel = "Percent in Asian Focus Areas"
#
gg <- ggplot(dt1, aes(x=Institution, y = Percent)) +
geom_bar(aes(fill=Race), stat="identity", position = "dodge") +
ggtitle(title) + xlab(xLabel) + ylab(yLabel) +
theme(plot.title = element_text(size=12, face="bold"), legend.position="bottom") +
scale_fill_brewer(palette = "Paired") +
coord_flip(xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")
#
return(gg)
}
#
makeChartSummaryNamesTop5AFAs <- function(dt){
### Function identifies which IPEDS degree areas are the
### most popular AFAs at the 21 selected institutions
#
### Loops throgh all institutions to obtain names of AFAs
### then tabulates their frequency using table function
### then produces a bar chart of names vs. frequency
#
allNamesAFA <- NULL
for (inst in dt) {
allNamesAFA <- c(allNamesAFA, inst[["asianFocusAreas"]])
}
#
namesAFA <- table(allNamesAFA)
dt <- as.tibble(namesAFA)
colnames(dt) <- c("AFA", "N")
dt <- arrange(dt, desc(N))[1:5,]
dt$AFA <- ordered(dt$AFA, levels=dt$AFA)
#
title = paste("Top Five Asian Focus Areas")
xLabel = "Asian Focus Areas"
yLabel = paste("Number of Institutions Having These Asian Focus Areas")
#
gg <- ggplot(dt, aes(x=AFA, y = N)) +
geom_bar(fill="turquoise2", stat="identity", position = "dodge", width=0.7) +
geom_text(aes(label=N), hjust=-0.2, colour="turquoise3", size=3, fontface="bold.italic") +
ggtitle(title) + xlab(xLabel) + ylab(yLabel) +
theme(plot.title = element_text(size=12, face="bold"), legend.position="bottom") +
coord_flip(xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")
#
return(gg)
}
#
makeTableHarvardTech <- function(dt, inst1="Harvard", inst2="MIT"){
Harvard <- dt[[inst1]]
MIT <- dt[[inst2]]
tabHarvard <- Harvard[["tableGradRace"]]
tabMIT <- MIT[["tableGradRace"]]
#
# Get last two rows for Havard ... Asian Focus Area stats
hRows <- dim(tabHarvard)[1]
tabHarvard <- tabHarvard[hRows-1,]
tabHarvard <- rename(tabHarvard, Institution = DegreeAreas)
tabHarvard[1,1] <- "HarvardTech"
tabHarvard <- select(tabHarvard, -A2W)
perHarvard <- makePerVec(as.numeric(tabHarvard[1, -1]), firstTotal = TRUE, dec=0)
names(perHarvard) <- names(tabHarvard[-1])
tabHarvard <- bind_rows(tabHarvard, perHarvard)
tabHarvard[2,1] <- "HarvardTech %"
#
# Get two preceding rows for MIT ... stats for all students
mRows <- dim(tabMIT)[1]
tabMIT <- tabMIT[mRows-3,]
tabMIT <- rename(tabMIT, Institution = DegreeAreas)
tabMIT[1,1] <- "M.I.T."
tabMIT <- select(tabMIT, -A2W)
perMIT <- makePerVec(as.numeric(tabMIT[1, -1]), firstTotal = TRUE, dec=0)
names(perMIT) <- names(tabMIT[-1])
tabMIT <- bind_rows(tabMIT, perMIT)
tabMIT[2,1] <- "M.I.T. %"
#
tableHarvardTech <- bind_rows(tabMIT, tabHarvard)
tableHarvardTech <- select(tableHarvardTech, -c(White, Asian))
return(tableHarvardTech)
}
#
#############################
############################
#
### C. Execute the script commands that call the functions
tableSummaryGrads <- makeTableSummaryGrads(listInstitutions)
#
#####
tableSummaryEnrollRaces <- makeTableSummaryEnrollRacesOthers(tableSummaryEnrollDraft, listInstitutions)
tableSummaryEnrollOthers <- makeTableSummaryEnrollRacesOthers(tableSummaryEnrollDraft, listInstitutions, "Others")
tableHarvardTech <- makeTableHarvardTech(listInstitutions, "Harvard", "MIT")
#
chartSummaryGrads <- makeChartSummaryGrads(tableSummaryGrads)
chartSummaryGrads
chartSummaryNamesTop5AFAs <- makeChartSummaryNamesTop5AFAs(listInstitutions)
chartSummaryNamesTop5AFAs
#
save(file="master3.rda", listInstitutions, tableHarvardTech, tableSummaryGrads, tableSummaryEnrollRaces, tableSummaryEnrollOthers, chartSummaryGrads, chartSummaryNamesTop5AFAs)
##################
#Examples
ratios <- tableSummaryGrads$`Num Focus` / tableSummaryGrads$`Num IPEDS`
sum(ratios < .4)
Harvard <- listInstitutions[["Harvard"]]
tableHarvardGradRace<-Harvard[["tableGradRace"]]
#
tabHarvard <- tabHarvard %>%
select(-c(White, Asian, A2W))
tabMIT <- tabMIT %>%
select(-c(White, Asian, A2W))
This blog was established by the Digital Learning Lab to provide information that supports Black America’s efforts to close the Digital Divide. Its original focus on HBCUs has been broadened to include other colleges, universities, and community-based groups that enhance the computational thinking skills of Black Americans and the networks of successful Black techs who support each others’ efforts to achieve even greater success.
R-code master3
Subscribe to:
Posts (Atom)
No comments:
Post a Comment
Thank you!!! Your comments and suggestions will be greatly appreciated ... :-)