# 如何進行R語言ggplot2包畫曼哈頓圖的簡單分析
## 摘要
曼哈頓圖(Manhattan Plot)是基因組學研究中展示全基因組關聯分析(GWAS)結果的經典可視化工具。本文將詳細介紹如何使用R語言中的`ggplot2`包繪制曼哈頓圖,包括數據準備、基礎繪圖、高級定制以及結果解讀。通過本教程,讀者將掌握利用R語言進行GWAS結果可視化的核心技能。
---
## 1. 曼哈頓圖簡介
曼哈頓圖因其形似紐約曼哈頓天際線而得名,主要用于:
- 展示GWAS中SNP位點的顯著性水平(-log10(p-value))
- 識別基因組中與表型顯著相關的區域
- 直觀呈現全基因組范圍內的關聯信號
典型特征:
- X軸:染色體位置
- Y軸:關聯顯著性(通常取-log10轉換)
- 閾值線:標注顯著性水平(如5×10??)
---
## 2. 準備工作
### 2.1 安裝必要R包
```r
install.packages(c("ggplot2", "qqman", "dplyr"))
library(ggplot2)
library(dplyr)
使用qqman
包內置的GWAS結果數據:
data(gwasResults)
head(gwasResults)
數據結構應包含:
- CHR
: 染色體編號
- BP
: 堿基位置
- P
: p值
- SNP
: SNP標識符(可選)
ggplot(gwasResults, aes(x = BP, y = -log10(P), color = factor(CHR))) +
geom_point(alpha = 0.6) +
scale_color_manual(values = rep(c("skyblue", "orange"), 22)) +
labs(x = "Chromosomal Position", y = "-log10(p-value)") +
theme_minimal()
參數 | 作用 |
---|---|
alpha |
控制點透明度(0-1) |
size |
點的大小 |
scale_color_manual |
交替染色體顏色 |
ggplot(gwasResults) +
geom_point(aes(x = BP, y = -log10(P), alpha = 0.6) +
geom_hline(yintercept = -log10(5e-8), color = "red", linetype = "dashed") +
geom_hline(yintercept = -log10(1e-5), color = "blue", linetype = "dashed")
gwasResults <- gwasResults %>%
group_by(CHR) %>%
mutate(BP_cum = cumsum(as.numeric(BP)))
ggplot(gwasResults, aes(x = BP_cum, y = -log10(P), color = factor(CHR))) +
geom_point() +
scale_x_continuous(label = 1:22, breaks = gwasResults %>% group_by(CHR) %>% summarize(center = mean(BP_cum)) %>% pull(center))
significant_snps <- gwasResults %>% filter(P < 5e-8)
ggplot(gwasResults, aes(x = BP, y = -log10(P))) +
geom_point(aes(color = factor(CHR))) +
geom_point(data = significant_snps, color = "red", size = 2) +
ggrepel::geom_text_repel(data = significant_snps, aes(label = SNP), size = 3)
library(ggplot2)
library(dplyr)
library(ggrepel)
# 數據處理
gwasResults <- gwasResults %>%
group_by(CHR) %>%
mutate(BP_cum = cumsum(BP) - cumsum(rep(mean(diff(BP)), n())))
# 確定染色體中心位置
axis_df <- gwasResults %>%
group_by(CHR) %>%
summarize(center = mean(BP_cum))
# 繪圖
manhattan_plot <- ggplot(gwasResults, aes(x = BP_cum, y = -log10(P),
color = factor(CHR %% 2))) +
geom_point(alpha = 0.75) +
geom_hline(yintercept = -log10(5e-8), color = "red", linetype = "dashed") +
scale_x_continuous(label = axis_df$CHR, breaks = axis_df$center) +
scale_y_continuous(expand = c(0, 0.1)) +
scale_color_manual(values = c("skyblue", "orange")) +
labs(
x = "Chromosome",
y = "-log10(p-value)",
title = "GWAS Manhattan Plot"
) +
theme_bw() +
theme(
legend.position = "none",
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
# 標記顯著位點
if(nrow(significant_snps) > 0){
manhattan_plot <- manhattan_plot +
geom_point(data = significant_snps, color = "red") +
ggrepel::geom_text_repel(
data = significant_snps,
aes(label = SNP),
size = 3,
box.padding = 0.5
)
}
print(manhattan_plot)
alpha
參數或使用geom_hex()
theme(axis.text.x = element_text(angle = 45, hjust = 1))
data.table
處理數據或先采樣”`
注:本文代碼已在R 4.2.0 + ggplot2 3.4.0環境下測試通過。實際應用時請根據數據特征調整參數。
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。