R语言绘图

热图

差异基因热图绘制

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#
# Create heat map from a differential expression count table.
#
# Load the library.
suppressPackageStartupMessages(library(gplots))

# The name of the file that contains the counts.
count_file = "results.csv"

# The name of the output file.
output_file = "heatmap.pdf"

# Inform the user.
print("# Tool: Create Heatmap ")
print(paste("# Input: ", count_file))
print(paste("# Output: ", output_file))

# FDR cutoff.
MIN_FDR = 0.05

# Plot width
WIDTH = 12

# Plot height.
HEIGHT = 13

# Set the margins
MARGINS = c(9, 12)

# Relative heights of the rows in the plot.
LHEI = c(1, 5)

# Read normalized counts from the standard input.
data = read.csv(count_file, header=T, as.is=TRUE)

# Subset data for values under a treshold.
data = subset(data, data$FDR <= MIN_FDR)

# The heatmap row names will be taken from the first column.
row_names = data[, 1]

# The code assumes that the normalized data matrix is listed to the right of the falsePos column.
idx = which(colnames(data) == "falsePos") + 1

# The normalized counts are on the right size.
counts = data[, idx : ncol(data)]

# Load the data from the second column on.
values = as.matrix(counts)

# Adds a little noise to each element to avoid the
# clustering function failure on zero variance rows.
values = jitter(values, factor = 1, amount = 0.00001)

# Normalize each row to a z-score
zscores = NULL
for (i in 1 : nrow(values)) {
row = values[i,]
zrow = (row - mean(row)) / sd(row)
zscores = rbind(zscores, zrow)
}

# Set the row names on the zscores.
row.names(zscores) = row_names

# Turn the data into a matrix for heatmap2.
zscores = as.matrix(zscores)

# Set the color palette.
col = greenred

# Create a PDF device
pdf(output_file, width = WIDTH, height = HEIGHT)

heatmap.2(zscores, col=col, density.info="none", Colv=NULL,
dendrogram="row", trace="none", margins=MARGINS, lhei=LHEI)

dev.off()

  • 设置差异基因筛选条件为FDR < 0.05, 还可以根据需要设置差异倍数,筛选出需要画图的基因
  • 代码默认归一化的矩阵在”falsePos”列的右侧,如果不一致,需另行调整代码

火山图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

# Loading library
library(tidyverse)
library(readxl)

# Import data
data <- read_xlsx("全部数据.xlsx")
summary(data)
str(data)

# Pretreat data
data$Status[data$LOG2FC > 1.2 & data$P.value<0.05 & data$OPLSDA.VIP > 1 ] <- "Up-regulated"
data$Status[data$LOG2FC < -1.2 & data$P.value<0.05 & data$OPLSDA.VIP > 1] <- "Down-regulated"
data$Status[is.na(data$Status)] <- "Insignificant Change"

# Plot volcano chart
ggplot(data = data, aes(LOG2FC,-log10(P.value), color = Status))+
geom_hline(yintercept = -log10(0.05),linetype = "dashed",color ="#999999")+
geom_vline(xintercept = c(-1,1),linetype = "dashed",color ="#999999")+
geom_point(aes(size= OPLSDA.VIP))+
scale_size(range = c(1, 3)) +
scale_color_manual(values = c("dodgerblue","gray","firebrick"))+
theme_bw()+
theme(panel.grid = element_blank())

韦恩图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

library(tidyverse)
library(VennDiagram)

k9_up <- read.table("NaB_T1VSNaB_C1_up.targetList.xls")
k9_down <- read.table("NaB_T1VSNaB_C1_down.targetList.xls")



H3K9ac_up <- unique(k9_up)
H3K9ac_down <- unique(k9_down)


data <- list(H3K9ac_up$V1, H3K9ac_down$V1)



venn.diagram(data, fill = c("#FFB5BF", "#97C8E0"),
alpha = 0.5, label.col = "black",
cex = 1.5, cat.col = c("#FFB5BF", "#97C8E0"),
cat.cex = 1, cat.fontface = "bold",
label.cex = 0.8,
filename = "~/Desktop/H3K9ac_ven.png",
category = c("UP", "DOWN"))

九象限图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


# load package ------------------------------------------------------------

library(tidyverse)


# loda data ---------------------------------------------------------------

com <- read.csv("common.csv")

# clean data --------------------------------------------------------------

com$DEG <- ifelse(com$RNA_log2FC > 1 & com$H3K9ac_log2FC > 1, "UP-UP",
ifelse(com$RNA_log2FC < -1 & com$H3K9ac_log2FC < -1, "DOWN-DOWN",
ifelse(com$RNA_log2FC > 1 & com$H3K9ac_log2FC < -1, "UP-DOWN",
ifelse(com$RNA_log2FC < -1 & com$H3K9ac_log2FC > 1, "DOWN-UP",
"NONE"))))

# plot image --------------------------------------------------------------

ggplot(com, aes(x = RNA_log2FC, y=H3K9ac_log2FC, colour=DEG)) +
geom_point(alpha=0.85, size=1) +
geom_vline(xintercept=c(-1,1),lty=4,col="black",lwd=0.5) +
geom_hline(yintercept=c(-1,1),lty=4,col="black",lwd=0.5) +
scale_color_manual(values=c('steelblue','#03BF7D','gray', '#AEAF31','brown')) +
theme_bw()+
guides(color = "none") +
theme(plot.margin = unit(c(1,1,1,1), "cm"),
panel.grid = element_blank()) +
labs(x="RNA-Seq_log2FC")

ggsave("H3K9ac_com.pdf", width = 15, height = 15, units = "cm" )


散点图+箱线图+小提琴图+t检验+循环

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
library(readxl)
library(tidyverse)
library(ggpubr)
library(gridExtra)

pcr <- read_xlsx("qrt-pcr.xlsx")
pcr$Group <- factor(pcr$Group, levels = c("control", "butyrate"))
pcr$Gene <- factor(pcr$Gene, levels = paste0("HDAC", 1:11))

HDAC <- paste0("HDAC", 1:11)
plots <- list()

for (i in HDAC) {
plots[[i]] <- ggplot(pcr[pcr$Gene == i, ], aes(x = Group, y = Expression)) +
geom_point(color="black",
size=5, alpha = 0.8)+
geom_violin(aes(fill = Group),
color = "gray", alpha = 0.4,
scale = "width",
linewidth = 0.6,
trim = T)+
geom_boxplot(color = "white",
outlier.color = "black",
width = 0.2,
size = 0.8,
fill = NA) +
geom_signif(comparisons = list(c("control", "butyrate")),
y_position = max(pcr[pcr$Gene == i, ]$Expression+0.1),
map_signif_level = T,
test = "t.test",
textsize = 4) +
guides(fill = "none")+
theme_bw()+
labs(x = i, y = "Relative expression of mRNA")+
ylim(NA, max(pcr[pcr$Gene == i, ]$Expression+0.2)) +
theme(axis.title.x = element_text(size = 12, face = "bold"),
panel.grid = element_blank())
}

do.call("grid.arrange", c(plots, ncol=4))



多次拟合曲线

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
library(tidyverse)
library(readxl)


HPY <- read_xlsx("HPY数据.xlsx“)


ggplot(HPY, aes(x = Time, y = HPY, linetype=Type)) +
geom_smooth(se = FALSE, color = "black", method = "lm", formula = y ~ poly(x, degree = 4)) +
xlab("Time (day)") +
ylab("HPY (µg/ml)")+
scale_y_continuous() +
theme_bw()+
theme(legend.title = element_blank(),
legend.position = c(0.9, 0.9),
panel.grid = element_blank(),
plot.margin = margin(1, 1, 1, 1, unit = "cm"))


fit1 <- lm(HPY ~ poly(Time, degree = 4),
data = HPY[HPY$Type == "primiparous",])
summary(fit1)

fit2 <- lm(HPY ~ poly(Time, degree = 4),
data = HPY[HPY$Type == "mutiparous",])
summary(fit2)