Fall23 Barry Grant Bioinformatics
Delisa Ramos (PID:A69026881)
To use ggplot2, we first need to install it on our computers. To do this we will is the function ‘install.packages()’. Before I use any package functions I have to load them up with a ‘library()’ call, like so:
#install.packages("gifski")
#install.packages("gganimate")
#install.packages("gapminder")
#install.packages("patchwork")
library(gapminder)
library(ggplot2)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.3 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gifski)
library(gganimate)
library(patchwork)
There is always the “base R” graphics system, i.e. ‘plot()’
plot(cars)
To use ggplot, I need to spell out at least 3 things: i. data (stuff I want to plot) ii. aesthetics (aes) iii. geometries (geom_)
ggplot(cars, aes(x=speed, y=dist)) +
geom_point() +
geom_line()
ggplot(cars, aes(x=speed, y=dist)) +
geom_point()
#geom_smooth
ggplot(cars, aes(x=speed, y=dist)) +
geom_smooth(se=F, method = "lm") # lm = linear model
`geom_smooth()` using formula = 'y ~ x'
#add labs and color theme
#geom_smooth
ggplot(cars, aes(x=speed, y=dist)) +
geom_smooth() +
ggtitle("Relationship between Speed and Distance") +
xlab("Speed") + ylab("Distance") +
theme_bw()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
url <- "https://bioboot.github.io/bimm143_S20/class-material/up_down_expression.txt"
genes <- read.delim(url)
head(genes)
Gene Condition1 Condition2 State
1 A4GNT -3.6808610 -3.4401355 unchanging
2 AAAS 4.5479580 4.3864126 unchanging
3 AASDH 3.7190695 3.4787276 unchanging
4 AATF 5.0784720 5.0151916 unchanging
5 AATK 0.4711421 0.5598642 unchanging
6 AB015752.4 -3.6808610 -3.5921390 unchanging
nrow(genes)
[1] 5196
ncol(genes)
[1] 4
colnames(genes)
[1] "Gene" "Condition1" "Condition2" "State"
table(genes$State)
down unchanging up
72 4997 127
round(127/5196, 2)
[1] 0.02
g <- ggplot(genes, aes(x=Condition1, y= Condition2, col=State)) +
geom_point()
g + scale_color_manual(values=c("magenta", "navy", "purple")) +
ggtitle("Gene Expression Changes Upon Drug Treatment") +
xlab("Control (no drugs)") + ylab("Drug Treatment")
url <- "https://raw.githubusercontent.com/jennybc/gapminder/master/inst/extdata/gapminder.tsv"
gapminder <- read.delim(url)
head(gapminder)
country continent year lifeExp pop gdpPercap
1 Afghanistan Asia 1952 28.801 8425333 779.4453
2 Afghanistan Asia 1957 30.332 9240934 820.8530
3 Afghanistan Asia 1962 31.997 10267083 853.1007
4 Afghanistan Asia 1967 34.020 11537966 836.1971
5 Afghanistan Asia 1972 36.088 13079460 739.9811
6 Afghanistan Asia 1977 38.438 14880372 786.1134
#filter to 2007
gapminder_07 <- gapminder %>% filter(year==2007)
colnames(gapminder_07)
[1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
ggplot(gapminder_07, aes(x=gdpPercap, y=lifeExp, col=continent, size=pop)) +
geom_point(alpha=0.4)
#alternatively
ggplot(gapminder_07, aes(x=gdpPercap, y=lifeExp, col=pop)) +
geom_point(alpha=0.4)
g07 <- ggplot(gapminder_07, aes(x=gdpPercap, y=lifeExp, col=continent, size=pop)) +
geom_point(alpha=0.4) +
scale_size_area(max_size=10) +
ggtitle("Country's GDP percap vs. Life Expectancy", subtitle = "year: 2007")
g07
# year 1957
gapminder_1957 <- gapminder %>% filter(year==1957)
g1957 <-
ggplot(gapminder_1957, aes(x=gdpPercap, y=lifeExp, col=continent, size=pop)) +
geom_point(alpha=0.7) +
scale_size_area(max_size=15) +
ggtitle("Country's GDP percap vs. Life Expectancy", subtitle = "year: 1957")
g1957
both <- gapminder %>% filter(year==2007 | year==1957)
ggplot(both, aes(x=gdpPercap, y=lifeExp, col=continent, size=pop)) +
geom_point(alpha=0.7) +
scale_size_area(max_size=10) +
ggtitle("Country's GDP percap vs. Life Expectancy", subtitle = "year: bith 1957 and 2007") + facet_wrap(~year)
gapminder_top5 <- gapminder %>%
filter(year==2007) %>%
arrange(desc(pop)) %>%
top_n(5, pop)
gapminder_top5
country continent year lifeExp pop gdpPercap
1 China Asia 2007 72.961 1318683096 4959.115
2 India Asia 2007 64.698 1110396331 2452.210
3 United States Americas 2007 78.242 301139947 42951.653
4 Indonesia Asia 2007 70.650 223547000 3540.652
5 Brazil Americas 2007 72.390 190010647 9065.801
ggplot(gapminder_top5, aes(x=country, y=pop)) +
geom_col()
colnames(gapminder_top5)
[1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
#life expectancy
ggplot(gapminder_top5, aes(x=country, y=lifeExp)) +
geom_col()
#add color
ggplot(gapminder_top5, aes(x=country, y=lifeExp, fill=continent)) +
geom_col()
ggplot(gapminder_top5, aes(x=country, y=lifeExp, fill=lifeExp)) +
geom_col()
ggplot(gapminder_top5, aes(x=reorder(country, -pop), y=pop, fill=country)) +
geom_col()
head(USArrests)
Murder Assault UrbanPop Rape
Alabama 13.2 236 58 21.2
Alaska 10.0 263 48 44.5
Arizona 8.1 294 80 31.0
Arkansas 8.8 190 50 19.5
California 9.0 276 91 40.6
Colorado 7.9 204 78 38.7
#add new column... could also use mutate()
USArrests$State <- rownames(USArrests)
ggplot(USArrests) +
aes(x=reorder(State,Murder), y=Murder) +
coord_flip() +
geom_point() + #adds a point to end of each bar, helps with comparison
geom_segment(aes(x=State, xend=State,
y=0, yend= Murder), color="magenta")
# Setup nice regular ggplot of the gapminder data
ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
geom_point(alpha = 0.7, show.legend = FALSE) +
scale_colour_manual(values = country_colors) +
scale_size(range = c(2, 12)) +
scale_x_log10() +
# Facet by continent
facet_wrap(~continent) +
# Here comes the gganimate specific bits
labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
transition_time(year) +
shadow_wake(wake_length = 0.1, alpha = FALSE)
#just messing around
(g07/g1957)