Skip to contents

Many users have expressed a desire to customize msDiaLogue plots. To address this, we will provide guidelines on how to modify ggplot objects generated by msDiaLogue to align with the visual standards.

For R beginners, R for Data Science (Wickham, Çetinkaya-Rundel, and Grolemund 2023) is a fantastic resource, available online at https://r4ds.hadley.nz/. For users particularly interested in data visualization, ggplot2: Elegant Graphics for Data Analysis (Wickham 2016), available at https://ggplot2-book.org/, is highly recommended.

The following is the preliminary to generate the default visualization in msDiaLogue:

## Load R package
library(msDiaLogue)
## Preprocessing
fileName <- "../tests/testData/Toy_Spectronaut_Data.csv"
dataSet <- preprocessing(fileName,
                         filterNaN = TRUE, filterUnique = 2,
                         replaceBlank = TRUE, saveRm = TRUE)
## Transformation
dataTran <- transform(dataSet, logFold = 2)
## Normalization
dataNorm <- normalize(dataTran, normalizeType = "quant")
## Imputation
dataImput <- impute.min_local(dataNorm, reportImputing = FALSE,
                              reqPercentPresent = 0.51)
dataImput <- filterNA(dataImput, saveRm = TRUE)
## Analysis
cond <- c("100fmol", "50fmol")
anlys_mod.t <- analyze(dataImput, conditions = cond, testType = "mod.t-test")
anlys_MA <- analyze(dataImput, conditions = cond, testType = "MA")

Volcano plot

## default volcano
visualize(anlys_mod.t, graphType = "volcano",
          P.thres = 0.05, logF.thres = 0.6)
#> Warning: Removed 15 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

The actual code for generating the default volcano plot with ggplot2, using data from the function visualize() in msDiaLogue, is as follows:

volcano <- visualize(anlys_mod.t, graphType = "volcano",
                     P.thres = 0.05, logF.thres = 0.6)
## extract the data used for the plot from the volcano object
plotData <- volcano[["data"]]
P.thres <- 0.05
logF.thres <- 0.6
# default volcano plot code
library(ggplot2)
library(ggrepel)
ggplot(plotData, aes(x = Difference, y = -log10(P.value),
                     col = Significant, label = delabel)) +
      geom_vline(xintercept = c(-logF.thres, logF.thres), linetype = "dashed") +
      geom_hline(yintercept = -log10(P.thres), linetype = "dashed") +
      geom_point() +
      geom_text_repel(show.legend = FALSE) +
      scale_color_manual(values = c("Down" = "blue", "Up" = "red",
                                    "Inconclusive" = "gray", "No" = "gray20")) +
      labs(x = expression("log"[2]*"FC"), y = expression("-log"[10]*"P-value")) +
      theme_bw() +
      theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#> Warning: Removed 15 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

Change labels of proteins

The following code is shown to demonstrate how to replace the default labels in the volcano plot with gene or protein names, using additional information.

## read a csv file containing full protein information, including gene/protein names
protNames <- read.csv("full_protein_information.csv")
## convert the row names of plotData into a column named "PG.ProteinNames"
plotData <- tibble::rownames_to_column(plotData, "PG.ProteinNames")
library(dplyr)
## merge the plotData with protNames based on the "PG.ProteinNames" column
check <- left_join(plotData, protNames, by = join_by(PG.ProteinNames == PG.ProteinNames))
## change the default labels from accessions to gene/protein names
check$delabel <- ifelse(is.na(check$delabel), check$delabel, check$PG.Genes)
ggplot(check, aes(x = Difference, y = -log10(P.value),
                  col = Significant, label = delabel)) +
  geom_vline(xintercept = c(-logF.thres,logF.thres), linetype = "dashed") +
  geom_hline(yintercept = -log10(P.thres), linetype = "dashed") +
  geom_point() +
  geom_text_repel(show.legend = FALSE) +
  scale_color_manual(values = c("Down" = "blue", "Up" = "red",
                                Inconclusive = "gray", No = "gray20")) +
  labs(x = expression("log"[2]*"FC"), y = expression("-log"[10]*"P-value")) +
  theme_bw() +
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#> Warning: Removed 15 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

Change colors

To customize the colors in your volcano plot, you can use the function scale_color_manual() from ggplot2. This allows you to specify custom colors for different significance levels in the output plot.

ggplot(check, aes(x = Difference, y = -log10(P.value),
                  col = Significant, label = delabel)) +
  geom_vline(xintercept = c(-logF.thres,logF.thres), linetype = "dashed") +
  geom_hline(yintercept = -log10(P.thres), linetype = "dashed") +
  geom_point() +
  geom_text_repel(show.legend = FALSE) +
  ## customize the colors for different significance levels
  scale_color_manual(values = c(Down = "purple", Up = "orange",
                                Inconclusive = "gray", No = "gray20")) +
  labs(x = expression("log"[2]*"FC"), y = expression("-log"[10]*"P-value")) +
  theme_bw() +
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#> Warning: Removed 15 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

Change themes

To modify the appearance of your volcano plot, you can change the theme using ggplot2’s theme_*() functions. Themes control the overall look of the plot, including the background, gridlines, and text.

ggplot(check, aes(x = Difference, y = -log10(P.value),
                  col = Significant, label = delabel)) +
  geom_vline(xintercept = c(-logF.thres,logF.thres), linetype = "dashed") +
  geom_hline(yintercept = -log10(P.thres), linetype = "dashed") +
  geom_point() +
  geom_text_repel(show.legend = FALSE) +
  scale_color_manual(values = c("Down" = "blue", "Up" = "red",
                                Inconclusive = "gray", No = "gray20")) +
  labs(x = expression("log"[2]*"FC"), y = expression("-log"[10]*"P-value")) +
  ## use a classic theme
  theme_classic() +
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#> Warning: Removed 15 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

MA plot

## default MA
visualize(anlys_MA, graphType = "MA",
          M.thres = 0.5, transformLabel = "Log2")
#> Warning: Removed 14 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

The actual code for generating the default MA plot with ggplot2, using data from the function visualize() in msDiaLogue, is as follows:

ma <- visualize(anlys_MA, graphType = "MA",
                M.thres = 0.5, transformLabel = "Log2")
## extract the data used for the plot from the ma object
plotData <- ma[["data"]]
M.thres <- 0.5
transformLabel <- "Log2"
# default MA plot code
library(ggplot2)
library(ggrepel)
ggplot(plotData, aes(x = A, y = M, color = Significant, label = delabel)) +
  geom_hline(yintercept = c(-M.thres, M.thres), linetype = "dashed") +
  geom_point() +
  geom_text_repel(show.legend = FALSE) +
  scale_color_manual(values = c("Down" = "blue", "No" = "gray", "Up" = "red")) +
  labs(title = paste(transformLabel, "Transformed MA Plot"),
       x = paste(transformLabel, "average abundance"),
       y = paste(transformLabel, "fold change")) +
  theme_bw() +
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#> Warning: Removed 14 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

Change labels of proteins

To change the labels on your plot from accession numbers to gene or protein names, follow these steps:

## read a csv file containing full protein information, including gene/protein names
protNames <- read.csv("full_protein_information.csv")
## convert the row names of plotData into a column named "PG.ProteinNames"
plotData <- tibble::rownames_to_column(plotData, "PG.ProteinNames")
library(dplyr)
## merge the plotData with protNames based on the "PG.ProteinNames" column
check <- left_join(plotData, protNames, by = join_by(PG.ProteinNames == PG.ProteinNames))
## change the default labels from accessions to gene/protein names
check$delabel <- ifelse(is.na(check$delabel), check$delabel, check$PG.Genes)
ggplot(check, aes(x = A, y = M, color = Significant, label = delabel)) +
  geom_hline(yintercept = c(-M.thres, M.thres), linetype = "dashed") +
  geom_point() +
  geom_text_repel(show.legend = FALSE) +
  scale_color_manual(values = c("Down" = "blue", "No" = "gray", "Up" = "red")) +
  labs(title = paste(transformLabel, "Transformed MA Plot"),
       x = paste(transformLabel, "average abundance"),
       y = paste(transformLabel, "fold change")) +
  theme_bw() +
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#> Warning: Removed 14 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

Change colors

To adjust the colors in your MA plot, you can customize the color scheme to better highlight different categories of significance. Here is how you can modify the colors in the plot:

ggplot(plotData, aes(x = A, y = M, color = Significant, label = delabel)) +
  geom_hline(yintercept = c(-M.thres, M.thres), linetype = "dashed") +
  geom_point() +
  geom_text_repel(show.legend = FALSE) +
  ## customize the colors
  scale_color_manual(values = c("Down" = "darkblue", "No" = "gray", "Up" = "orange")) +
  labs(title = paste(transformLabel, "Transformed MA Plot"),
       x = paste(transformLabel, "average abundance"),
       y = paste(transformLabel, "fold change")) +
  theme_bw() +
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#> Warning: Removed 14 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

Change themes

To alter the visual style of your MA plot, you can apply a different theme using ggplot2’s theme functions.

ggplot(plotData, aes(x = A, y = M, color = Significant, label = delabel)) +
  geom_hline(yintercept = c(-M.thres, M.thres), linetype = "dashed") +
  geom_point() +
  geom_text_repel(show.legend = FALSE) +
  scale_color_manual(values = c("Down" = "blue", "No" = "gray", "Up" = "red")) +
  labs(title = paste(transformLabel, "Transformed MA Plot"),
       x = paste(transformLabel, "average abundance"),
       y = paste(transformLabel, "fold change")) +
  ## use a minimal theme
  theme_minimal() +
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#> Warning: Removed 14 rows containing missing values or values outside the scale range
#> (`geom_text_repel()`).

Wickham, Hadley. 2016. ggplot2: Elegant Graphics for Data Analysis. New York, NY, USA: Springer.
Wickham, Hadley, Mine Çetinkaya-Rundel, and Garrett Grolemund. 2023. R for Data Science. Sebastopol, CA, USA: O’Reilly Media.