Thursday, March 15, 2007

computing background correction using justGCRMA

# Author Sucheta
# Has 2 different versions of background correction using GCRMA
# The first one returns the probeset values and the second one gets the
# probe values

#SCRIPT -1
#----------
#-----------
library(gcrma)
celpath <- "YOUR PATH"
eset <- justGCRMA(celfile.path=celpath, normalize=FALSE)
write.exprs(eset, "out.txt");

#-------- This would return the background corrected probeset values ---------------------------

#SCRIPT-2
#--------
#--------


library(gcrma);

# Get your affinity data else the program can automatically compute affinity
# Time consuming
affinityData <- "YOUR AFFINITY DATA";
load(affinityData);

# Compress CEL files
compress <- getOption("BioC")$affy$compress.cel
fast <- TRUE;
cdfName <- "Soybean";


celPath <- "YOUR PATH";

l <- AllButCelsForReadAffy(celfile.path = celPath);
fileNames <- l$filenames;

# Get file name length

n <- length(fileNames);

# Stop if file name length is zero

if(n == 0)
stop("File name Length is zero EXITING ...");


# Calling the function

pms <- mem.bkg(filenames=fileNames,
pm.affinities = pm.affinities,
mm.affinities = mm.affinities,
index.affinities = index.affinities,
type = "fullmodel",
optical.correct = TRUE,
verbose = TRUE,
minimum = 1,
k = 6 * fast+0.5*(1-fast),
rho = 0.7,
correction = 1,
stretch = 1.15*fast+1*(1-fast),
fast = TRUE,
cdfname = cdfName)

output <- paste(celPath,"_bg.txt", sep="");
write.table(pms, file=output, row.names=FALSE, col.names=TRUE, sep="\t");

# ---- FUNCTION DEFINITION --------------#

## A function to do background correction using less RAM

mem.bkg <- function(filenames, pm.affinities, mm.affinities,
index.affinities, type, minimum, optical.correct,
verbose, k, rho, correction, stretch, fast, cdfname){

pms <- read.probematrix(filenames=filenames, which="pm", cdfname = cdfname)$pm

## tmps used to carry optical correct value to bkg correction loop
if(optical.correct){
if(verbose) cat("Adjusting for optical effect.")
tmps <- NULL
for (i in 1:ncol(pms)){
if(verbose) cat(".")
mm <- read.probematrix(filenames=filenames[i], which="mm")$mm[,1]
tmp <- min(c(pms[,i], mm), na.rm=TRUE)
pms[,i] <- pms[,i]- tmp + minimum
tmps <- c(tmps, tmp)
}
}
if(verbose) cat("Done.\n")


if(type=="fullmodel" | type=="affinities"){
set.seed(1)
Subset <- sample(1:length(pms[index.affinities,]),25000)
y <- log2(pms)[index.affinities,][Subset]
Subset <- (Subset-1)%%nrow(pms[index.affinities,])+1
x <- pm.affinities[Subset]
fit1 <- lm(y~x)
}

if(verbose) cat("Adjusting for non-specific binding")
for(i in 1:ncol(pms)){
if(verbose) cat(".")

mm <- read.probematrix(filenames=filenames[i], which="mm")$mm[,1]

if(optical.correct)
mm <- mm - tmps[i] + minimum

if(type=="fullmodel"){
pms[,i] <- bg.adjust.fullmodel(pms[,i],mm,ncs=NULL,
pm.affinities,mm.affinities,anc=NULL,
index.affinities,k=k,
rho=rho,fast=fast)
pms[index.affinities,i] <- 2^(log2(pms[index.affinities,i])-
fit1$coef[2]*pm.affinities+mean(fit1$coef[2]*pm.affinities))
}
if(type=="affinities"){
pms[,i] <- bg.adjust.affinities(pms[,i],ncs=mm,pm.affinities,mm.affinities,
index.affinities, k=k,
fast=fast)
pms[index.affinities,i] <- 2^(log2(pms[index.affinities,i])-
fit1$coef[2]*pm.affinities +
mean(fit1$coef[2]*pm.affinities))
}
if(type=="mm") pms[,i] <- bg.adjust.mm(pms[,i],correction*mm,k=k,fast=fast)
if(type=="constant"){
pms[,i] <- bg.adjust.constant(pms[,i],
k=k,Q=correction*mean(pms[,i]< mm ),
fast=fast)
}
if(stretch!=1){
mu <- mean(log(pms[,i]))
pms[,i] <- exp(mu + stretch*(log(pms[,i])- mu))
}
}
if(verbose) cat("Done.\n")
return(pms)
}