args_data_cleaned_path <- "data/results/resfinder/args_data_latest_cleaned.tsv"
args_data <- read_tsv(args_data_cleaned_path)
amr_labels_cleaned_path <- "data/results/data_collection_ncbi/amr_labels_latest_cleaned.tsv"
amr_labels <- read_tsv(amr_labels_cleaned_path)
In the next section we will perform a correlation analysis between the different variables in the dataset and the multiple antibiotics.
# Prepate data for correlation analysis: Remove null values and sort data in same order
arranged_amr_labels <- amr_labels %>%
drop_na() %>%
arranged_args_data <- args_data %>%
filter(sample_name %in% arranged_amr_labels$`SampleID`) %>%
arrange(sample_name) %>%
select(-sample_name) %>%
select_if(function(x) any(x != 0))
arranged_amr_labels <- arranged_amr_labels %>%
# Calculate correlation matrix and its p-values
args_correlation_matrix_coefficients <- matrix(NA, nrow = ncol(arranged_args_data), ncol = ncol(arranged_amr_labels), dimnames = list(colnames(arranged_args_data), colnames(arranged_amr_labels)))
args_correlation_matrix_pvalues <- matrix(NA, nrow = ncol(arranged_args_data), ncol = ncol(arranged_amr_labels), dimnames = list(colnames(arranged_args_data), colnames(arranged_amr_labels)))
for (i in 1:ncol(arranged_args_data)) {
for (j in 1:ncol(arranged_amr_labels)) {
args_correlation_matrix_coefficients[i, j] <- cor.test(arranged_args_data[[i]], arranged_amr_labels[[j]])$estimate
args_correlation_matrix_pvalues[i, j] <- cor.test(arranged_args_data[[i]], arranged_amr_labels[[j]])$p.value
# TODO: analyze p-values to see if they are significant
# args_correlation_matrix <- cor(arranged_args_data, arranged_amr_labels) # Alternative method to calculate correlation matrix with coefficients but not p-values
# heatmap of correlation matrix
xlab = "Antibiotics", ylab = "Antibiotic Resistance Genes (ARGs)",
main = "Correlation Heatmap",
col = colorRampPalette(c("blue", "white", "red"))(100),
key = TRUE,
key.title = "Correlation Coefficients",
# Separate axis a bit more
margins = c(16, 6)
# Add gradient color legend
legend = c("-1", "0", "1"),
fill = colorRampPalette(c("blue", "white", "red"))(3),
title = "Correlation Coefficient",
cex = 0.8