# This R script is used to do ICE matrix balancing from Imakaev et al. (2012) # "Iterative correction of Hi-C data reveals hallmarks of chromosome organization" # usage: # Rscript matrix-balancing_ICE.R Hi-C_matrix_file output_file args <- commandArgs(trailingOnly=TRUE) file_input <- args[1] # Hi-C contact matrix file file_output <- args[2] # output file max_iters <- 6000 # maximum iterations, you can change it th <- 1e-3 # error tolerance, you can change it dat <- read.table(file_input) mat <- as.matrix(dat) nr <- nrow(mat) nc <- ncol(mat) if (nr != nc) { stop("The matrix is not n-by-n.\n", call. = FALSE ) } myf <- function(x,y,M,S){ if (S[x] & S[y]) { M[x,y]/(S[x]*S[y]) } else { 0 } } myf2 <- Vectorize(myf,vectorize.args = c('x','y')) pre_bias <- integer(nr) real_iters <- 0 for (m in 1:max_iters) { # step 1: calculate the sum of the matrix over all rows rsum <- rowSums(mat) # step 2: # step 3: calculate vector of biasa bias <- rsum # step 4: renormalize bias vector by its mean value over non-zero bins to avoid numerical instabilities bias_mean <- mean(bias[bias!=0]) bias <- bias/bias_mean # step 5: set zero values of bias to one to avoid 0/0 error bias[bias==0] <- 1 # step 6: divide mat_ij by bias_i*bias_j for all ij mat <- outer(1:nr, 1:nc, myf2, mat, bias) # step 7: if (m > 1) { tmp_sum <- sum(abs(pre_bias - bias)) if (tmp_sum < th) break } pre_bias <- bias real_iters <- real_iters + 1 } #if (real_iters == max_iters) { #stop("ICE cannot converage after maximum iterations, please set a larger number of iteration.\n", call. = FALSE ) #} else { write.table(mat, file=file_output, row.names=F, col.names=F, sep=" ", quote=F) #}