# This R scirpt is used to normalize Hi-C contact matrix using c_ij = c_ij/(row_i*col_j) # row_i is the sum of row i # col_j is the sum of col j # The method is from (Lieberman-Aiden et al., 2009) "Comprehensive mapping of long-range interactions reveals folding principles of the human genome" # usage: # Rscript matrix-balancing_VC.R Hi-C_matrix_file output_file args <- commandArgs(trailingOnly=TRUE) dat <- read.table(args[1]) mat <- as.matrix(dat) nr <- nrow(mat) nc <- ncol(mat) if (nr != nc) { stop("The matrix is not n-by-n.\n", call. = FALSE ) } rsum <- rowSums(mat) myf <- function(x,y,M,S){ if (S[x] & S[y]) { M[x,y]/(S[x]*S[y]) } else { 0 } } myf2 <- Vectorize(myf,vectorize.args = c('x','y')) mat2 <- outer(1:nr, 1:nc, myf2, mat, rsum) write.table(mat2, file=args[2], row.names=F, col.names=F, sep=" ", quote=F)