# OpWise: Identifying differentially expressed genes in bacterial microarray
# experiments
#
# http://microbesonline.org/OpWise
# Morgan N. Price, Adam P. Arkin, and Eric J. Alm, 2005
#
#
# SummarizeUarrayVsOperons() takes real data and produce a list with summary statistics (such as the mean
# and total deviance of each gene) for input to AnalyzeUarrayVsOperons(), which
# produces the p-values. See documentation for AnalyzeUarrayVsOperons() for
# an explanation of its results. You can also use the output of that function as
# input to OperonAgreementVsConfidencePlot()
#
# If you do not have replicates, then use OpWiseNoRepP() instead -- see below
#
# Use MapUarrayGenes() to convert data with arbitrary labels into data with VIMSS
# identifiers. This is required if you are using VIMSS (MicrobesOnline) operon
# predictions and are using other identifiers in the microarray data file(s).
#


# SummarizeUarrayVsOperons:
# The data should already be transformed to log-ratios (we normally use base 2, but
# you can use whatever you want).
#
# data should be a data frame or matrix with
# 1 row per gene, 1 column per replicate, and NA for missing values
#
# For genomic control experiments, data2 should be the control and should be analogous,
# so that muObs will be mean(data1)-mean(data2)
#
# If sweep is TRUE then each column is swept (the mean is subtracted first,
#	using genes present in all columns)
#
# If loess is non-null then a loess normalization is performed on the mean vs. the
#	sum of the log-levels. (This sum needs to be provided for
#	direct comparisons but not for genomic control)
# If sector is non-null then a sector normalization is performed (after the loess), using
#	sector as a factor over rows (the median is computed within each sector, and
#	subtracted from each gene in the sector).
# If spots is non-null then the data is over spots not over locusId (so that locusId is non-unique)
# locusId should be integers (e.g., microbesonline or VIMSS ids)
#
# opPred should be operon predictions with columns Gene1, Gene2, and pOp
#
SummarizeUarrayVsOperons <- function(opPred, locusId, data1, data2=NULL, sweep=FALSE, loess=FALSE,
					A=NULL, sector=NULL, spots=FALSE, name=NULL) {
	locusInput <- locusId;

	if(!is.data.frame(data1)) {
		stop("SummarizeUarrayVsOperons: data1 is not a data frame\n");
	}
	if(!is.null(data2) && !is.data.frame(data2)) {
		stop("SummarizeUarrayVsOperons: data2 is not a data frame\n");
	}

	n1 <- apply(data1,1,function(x) countif(is.finite(x)));
	n2 <- if(!is.null(data2)) apply(data2,1,function(x) countif(is.finite(x))) else NULL;

	if(max(abs(data1),na.rm=TRUE) > 100) {
		warning("SummarizeUarrayVsOperons data1 argument contains extreme values!",
			if(is.null(data2)) " Input should be log-ratios" else " Input should be log-levels");
	}
	if(!is.null(data2) && max(abs(data2),na.rm=TRUE) > 100) {
		warning("SummarizeUarrayVsOperons data2 argument contains extreme values! Input should be log-levels");
	}

	# sweep before averaging or normalization or normalization
	if(sweep) {
		cat("Sweeping out columns\n");
		good <- n1==ncol(data1);
		if(!is.null(data2)) good <- good & n2==ncol(data2);
		for (i in 1:ncol(data1)) { data1[,i] <- data1[,i] - mean(data1[good,i]); }
		if(!is.null(data2)) for (i in 1:ncol(data2)) { data2[,i] <- data2[,i] - mean(data2[good,i]); }
	}

	muObs <- apply(data1,1,mean,na.rm=TRUE);
	if(!is.null(data2)) {
		mu1 <- muObs;
		mu2 <- apply(data2,1,mean,na.rm=TRUE);
		cat("Comparing treatment to control\n");
		muObs <- mu1 - mu2;
	}
	if (countif(!is.na(muObs)) < 10 || max(abs(muObs)) < 0.0001) {
		stop("No differences apparent in data (all log-ratios are 0) -- OpWise is exiting\n");
	}

	if(loess) {
		cat("Loess normalization\n");
		use <- !is.na(muObs);
		if(is.null(data2)) use <- use & !is.null(A);
		if(is.null(data2)) {
			l <- loess(muObs[use]~A[use])$fitted;
		} else {
			l <- loess(muObs[use]~I(mu1[use]+mu2[use]))$fitted;
		}
		muObs[use] <- muObs[use] - l;
	}
	if(!is.null(sector)) {
		cat("Performing sector-based normalization\n");
		sectorMedians <- sapply(split(muObs,sector),median,na.rm=TRUE);
		muObs <- muObs - sectorMedians[unclass(sector)];
	}

	ss <- function(x) sum( withoutNA(x - mean(x,na.rm=TRUE))^2 );

	if(!spots) {
		sumsq <- apply(data1,1,ss);
		if (!is.null(data2)) sumsq <- sumsq + apply(data2,1,ss);
	} else {
		# aggregate everything over spots
		n1 <- aggregate(n1,list(locusId),sum)$x;
		muObs <- aggregate(muObs,list(locusId),mean,na.rm=TRUE)$x;
		sumsq <- aggregate(c(data1,recursive=TRUE),list(rep(locusId,ncol(data1))),ss)$x;
		if(!is.null(n2)) {
			n2 <- aggregate(n2,list(locusId),sum)$x;
			sumsq <- sumsq + aggregate(c(data2,recursive=TRUE),list(rep(locusId,ncol(data2))),ss)$x;
			mu1 <- aggregate(mu1,list(locusId),mean,na.rm=TRUE)$x;
			mu2 <- aggregate(mu2,list(locusId),mean,na.rm=TRUE)$x;
		}
		nSpots = table(locusId);
		if(countif(nSpots>20) > 1) {
			cat("More than 20 spots for these loci:",names(nSpots[nSpots>20]),"\n");
		}
		locusId <- unique(sort(locusId));
		cat("Aggregated over spots -- data for",length(locusId),"loci\n");
	}

	oppairs <- merge(opPred[opPred$pOp>.5,],data.frame(index1=1:length(locusId),Gene1=locusId));
	oppairs <- merge(oppairs,data.frame(index2=1:length(locusId),Gene2=locusId));
	cat("Have ",nrow(oppairs),"operon pairs to calibrate reliability from\n");
	data <- list(muObs=muObs,sumsq=sumsq,n1=n1,n2=n2,oppairs=oppairs,locusId=locusId,
			locusInput=locusInput,data1=data1,data2=data2,
			sweep=sweep,loess=loess,sector=sector,spots=spots,A=A);
	if(!is.null(n2)) { data$mu1 <- mu1; data$mu2 <- mu2; }
	if(!is.null(name)) { data$name <- name; }
	if(is.null(n2)) {
		cat("SummarizeUarrayVsOperons finished with data for",length(n1),"loci and up to",
			max(n1),"replicates\n");
	} else {
		cat("SummarizeUarrayVsOperons finished with data for ",length(n1),"loci and up to",
			max(n1),"treatment replicates and",max(n2),"treatment repliactes\n");
	}
		
	return(data);
}


# data must contain locusId, muObs, sumsq, n1, n2, and oppairs
# oppairs must contain index1, index2, and either pOp (if useP) or sOp (if !useP)
# If not provided, data$fit is estimated with FitUarrayData
#
# The fold parameter controls the "pFold" values, and the base parameter is what
# base was used to convert the data to log-scale.
#
# In the return values,
# data$stats includes the single-gene and operon-wise p-values, and is indexed by
#
#	locusId -- gene identifier
#	name -- alternate gene identifier (if included as argument to SummarizeUarrayVsOperons())
#
#	Single-gene analyses:
#	stats$p0 is P(true log-ratio > 0), which is near 0 for confident down-changers
#		and near 1 for confident up-changers.
#	stats$pFold is P(true fold-change > fold) for up-changers and
#		P(true fold-change < -fold) for down-changers.
#		Values near 1 indicate confident changers.
#	stats$muObs is the mean of the observations for the log-ratio
#	stats$xShrunk is the shrunk estimate of the log-ratio
#	stats$xLo is the lower end of the 95% confidence interval
#	stats$xHi is the higher end of the 95% confidence interval
#	stats$var is the shrunk estimate of the variance
#	stats$df is the degrees of freedom
#		(the posterior distribution of the log-ratio for each gene
#		is a t distribution with mean=x, variance=var, and degrees of freedom=df;
#		these suffice to compute stats$p0, stats$xLo, stats$pFold, stats$xHi.)
#
#	Operon-wise analyses -- these are analogous to the single-gene analyses
#		but use data from adjacent genes likely to be in the same operon as
#		well as for the gene itself:
#	stats$pow0 (like stats$p0 above)
#	stats$powFold (like stats$pFold above)
#
# data$changers describes the agreement with operons -- plots of the
# adjusted agreement with operons changersPAgreesAdj, which ranges from 0 to 1,
# or its confidence interval, changersPAgreesAdjConf, versus the average
# confidence within each confidence group changersCMeans reveal whether
# the p-values are reasonable.
AnalyzeUarrayVsOperons <- function(data,debug=FALSE,nConf=8,
				fold=1.5,base=2,minLogRatio=log(fold)/log(base)) {
	if (max(data$n1) <= 1 || (!is.null(data$n2) && max(data$n2) <= 1)) {
		stop("Cannot use AnalyzeUarrayVsOperons on data without replicates -- use OpWiseNoRepP instead\n");
	}
	if(is.null(data$fit)) {
		data$fit <- FitUarrayData(data$oppairs,data$muObs,data$sumsq,data$n1,data$n2,debug=debug);
		cat("AnalyzeUarrayVsOperons built model of consistency with operons\n");
	}
	data$stats <- TestUarrayVsModelOpWise(data,data$fit);
	data$stats$muObs <- data$muObs;
	if(!is.null(data$name)) data$stats$name <- data$name;
	data$changers <- changersVsOperonPairs(data$locusId,logodds(data$stats$p0),data$oppairs,both=TRUE);
	data$changers$confidence <- logodds2p(abs(data$changers$statistic));
	data$changers$cfactor <- cutN(data$changers$confidence,n=nConf);
	data$changersCMeans <- sapply(split(data$changers$confidence,data$changers$cfactor),mean);
	data$changersPAgrees <- sapply(split(data$changers$bAgree,data$changers$cfactor),mean);
	data$changersPAgreesConf <- t(sapply(split(data$changers$bAgree,data$changers$cfactor),
					function(x) t.test(x)$conf.int));
	# note this uses +-1 not 1,0
	data$changersPAgreesAdj <- sapply(split(data$changers$raw/data$changers$pOp,data$changers$cfactor),mean);
	data$changersPAgreesAdjConf <- t(sapply(split(data$changers$raw/data$changers$pOp,data$changers$cfactor),
					function(x) t.test(x)$conf.int));
	cat("AnalyzeUarrayVsOperons completed successfully\n");
	return(data);
}

# microarrayData should include sysName (or other "from" field) and log-ratios, for future
# 	input to SummarizeUarrayVsOperons().
# mapping should include sysName and locusId (or other "from" and "to" fields) -- other
# 	elements of mapping are ignored.
#
# Returns: a table like microarrayData, with the same row order, but with a new
# field locusId (or other "to" field)
MapUarrayGenes <- function(microarrayData, mapping, from="sysName", to="locusId") {
	nrows <- nrow(microarrayData);
	microarrayData <- merge(cbind(microarrayData,OWrowNumber=1:nrows), mapping[,c(from,to)],
				by=from,all.x=TRUE,all.y=FALSE);
	if(is.null(microarrayData) || nrow(microarrayData) != nrows) {
		stop("Error in arguments to MapUarrayGenes()");
	}
	microarrayData <- microarrayData[order(microarrayData$OWrowNumber),];
	microarrayData[is.na(microarrayData[[to]]),to] <-
		-microarrayData$OWrowNumber[is.na(microarrayData[[to]])];
	microarrayData$OWrowNumber <- NULL;
	return(microarrayData);
}


# Generates both single-gene and operon-wise p-values
# Reads data$oppair, data$muObs, data$sumsq, data$n1, and data$n2 but not other components
# oppair needs index1, index2, and either pOp or sOp (depending on useP)
TestUarrayVsModelOpWise <- function(data,fit,useP=TRUE,minLogRatio=log(1.5)/log(2)) {
	# includes p (===p0), pUp, pDown (stringent p's), mu0, V, df
	out <- TestUarrayVsModel(data$muObs,data$sumsq,data$n1,data$n2,fit=fit,minLogRatio=minLogRatio);
	pairs <- data$oppair[if(useP) data$oppair$pOp>.5 else data$oppair$sOp,];
	# compute posterior probability for each operon
	if(useP) {
		if (is.null(data$n2)) {
			ntot <- data$n1;
			nn <- data$n1;
		} else { # genomic control
			ntot <- data$n1 + data$n2 - 1;
			nn <- data$n1*data$n2/(data$n1+data$n2);
		}
		pairs$pOpPost <- logodds2p(logodds(pairs$pOp)
					+ log(relLik(data$muObs[pairs$index1],
						data$sumsq[pairs$index1],
						data$muObs[pairs$index2],
						data$sumsq[pairs$index2],
						a=data$fit$a,
						bc=data$fit$bc,
						c=data$fit$c,
						n1tot=ntot[pairs$index1],
						nn1=nn[pairs$index1],
						n2tot=ntot[pairs$index2],
						nn2=nn[pairs$index2])));
		pairs$pOpPost[is.na(pairs$pOpPost)] <- pairs$pOp[is.na(pairs$pOpPost)];
	} else {
		pairs$pOpPost <- rep(1,nrow(pairs));
	}
	pairs$row <- 1:nrow(pairs);
	ppair <- data.frame(row=1:nrow(pairs),
			pp0=TestUarrayVsModelPairs(pairs,data$muObs,data$sumsq,data$n1,data$n2,fit=fit),
			ppUp=TestUarrayVsModelPairs(pairs,data$muObs,data$sumsq,data$n1,data$n2,
						fit=fit,minLogRatio=minLogRatio,mode=1),
			ppDown=TestUarrayVsModelPairs(pairs,data$muObs,data$sumsq,data$n1,data$n2,
						fit=fit,minLogRatio=minLogRatio,mode=-1));

	# 1.Gene2=2.Gene1=center
	# index1.x,index2.x,index2.y is the sequence of indices
	triples <- merge(pairs,pairs,by.x="Gene2",by.y="Gene1");
	if(nrow(triples) > 0) {
		triples$row <- 1:nrow(triples);
		ptriples <- data.frame(
			row=1:nrow(triples),
			ppp0=apply(triples[,c("index1.x","index2.x","index2.y")], 1,
				function(j) TestUarrayVsModelN(data$muObs[j],data$sumsq[j],
							data$n1[j], if(is.null(data$n2)) NULL else data$n2[j],
							fit=fit) ),
			pppUp=apply(triples[,c("index1.x","index2.x","index2.y")], 1,
				function(j) TestUarrayVsModelN(data$muObs[j],data$sumsq[j],
							data$n1[j], if(is.null(data$n2)) NULL else data$n2[j],
							fit=fit,minLogRatio=minLogRatio,mode=1) ),
			pppDown=apply(triples[,c("index1.x","index2.x","index2.y")], 1,
				function(j) TestUarrayVsModelN(data$muObs[j],data$sumsq[j],
							data$n1[j], if(is.null(data$n2)) NULL else data$n2[j],
							fit=fit,minLogRatio=minLogRatio,mode=-1) )
			);
	} else {
		ptriples <- data.frame(row=1:nrow(triples),
					ppp0=rep(0.5,nrow(triples)),
					pppUp=rep(0.5,nrow(triples)),
					pppDown=rep(0.5,nrow(triples)) );
	}
	inPair1 <- merge(data.frame(index1=1:length(data$muObs)),pairs,all.x=TRUE,all.y=FALSE);
	inPair2 <- merge(data.frame(index2=1:length(data$muObs)),pairs,all.x=TRUE,all.y=FALSE);
	if (nrow(triples)>0) {
		inTriple <- merge(data.frame(index2.x=1:length(data$muObs)),triples,all.x=TRUE,all.y=FALSE);
	} else {
		inTriple <- data.frame(index2.x=1:length(data$muObs),row=rep(NA,length(data$muObs)));
	}

	ppair1 <- ppair[inPair1$row,];
	pOpPost1 <- pairs$pOpPost[inPair1$row];
	ppair2 <- ppair[inPair2$row,];
	pOpPost2 <- pairs$pOpPost[inPair2$row];
	ptriples <- ptriples[inTriple$row,];

	pOpPost1[is.na(pOpPost1)] <- 0;
	pOpPost2[is.na(pOpPost2)] <- 0;
	for (i in c("pp0","ppUp","ppDown")) {
		ppair1[[i]][is.na(ppair1[[i]])] <- 0.5;
		ppair2[[i]][is.na(ppair2[[i]])] <- 0.5;
	}
	for (i in c("ppp0","pppUp","pppDown")) ptriples[[i]][is.na(ptriples[[i]])] <- 0.5;

	# for genes not in pairs, have no change
	# for genes in a pair but not in a triple, use P(Op)*ppair + (1-P(Op))*p
	# for genes in a triple, with P(OpL) and P(OpR), use all three:
	# Operon-wise p = P(OpL)*P(OpR)*ptriple + (1-P(OpL))*P(OpR)*ppairR + P(OpL)*(1-P(OpR))*ppairL
	#	+ (1-P(OpL))*(1-P(OpR))*p
	# (If useP is false than all P(Op)=1)

	# associated pOp will be 0 if value is not relevant
	pTot0 <- (pOpPost1*pOpPost2*ptriples$ppp0
			+ pOpPost1*(1-pOpPost2)*ppair1$pp0
			+ (1-pOpPost1)*pOpPost2*ppair2$pp0
			+ (1-pOpPost1)*(1-pOpPost2)*out$p);
	pTotUp <- (pOpPost1*pOpPost2*ptriples$pppUp
			+ pOpPost1*(1-pOpPost2)*ppair1$ppUp
			+ (1-pOpPost1)*pOpPost2*ppair2$ppUp
			+ (1-pOpPost1)*(1-pOpPost2)*out$pUp);
	pTotDown <- (pOpPost1*pOpPost2*ptriples$pppDown
			+ pOpPost1*(1-pOpPost2)*ppair1$ppDown
			+ (1-pOpPost1)*pOpPost2*ppair2$ppDown
			+ (1-pOpPost1)*(1-pOpPost2)*out$pDown);

	return(data.frame(locusId=data$locusId,
				p0=out$p,
				pFold=ifelse(is.na(out$mu0),1,ifelse(out$mu0>0,out$pUp,out$pDown)),
				xUnshrunk=data$muObs,
				xShrunk=out$mu0,
				xLo=out$mu0 + sqrt(out$V)*qt(0.025,df=out$df),
				xHi=out$mu0 + sqrt(out$V)*qt(0.975,df=out$df),
				var=out$V,
				df=out$df,
				pow0=pTot0,
				powFold=ifelse(is.na(out$mu0),1,ifelse(out$mu0>0,pTotUp,pTotDown))
		));
}

TestUarrayVsModelPairs <- function(pair,mu,sumsq,n1,n2=NULL,useP=TRUE,useBias=TRUE,
				fit=FitUarrayData(pair,mu,sumsq,n1,n2=n2,useP=useP,useBias=useBias),
				minLogRatio=log(1.5)/log(2),
				mode=0) {
	sapply(1:nrow(pair),function(i) { j <- c(pair$index1[i],pair$index2[i]);
					TestUarrayVsModelN(mu[j],sumsq[j],n1[j],
							if(is.null(n2)) NULL else n2[j],
							fit=fit, minLogRatio=minLogRatio, mode=mode); });
}

# Tests data for a SINGLE group of genes believed to have matching expression patterns
# (i.e., be in an operon)
# All arguments except for fit should be vectors
# mode should be 0 (for P(mu>0)), 1 (for P(mu>minLogRatio)), or -1 (for P(mu< -minLogRatio))
#
TestUarrayVsModelN <- function(m, sumsq, n1, n2=NULL,
				fit=list(a=0.3, b=0.1555555,c=.28,bc=0.1,v=3.25),
				mode=0, minLogRatio=log(1.5)/log(2)) {
	ntot <- if(is.null(n2)) n1 else n1 + n2 - 1;
	nn <- if(is.null(n2)) n1 else 1/(1/n1 + 1/n2); # = 0 if either n1 or n2 is 0 -> no blow ups

	good <- (ntot >= 0) & is.finite(m);
	if(!any(good)) return(0.5); # no data

	m <- m[good]; sumsq <- sumsq[good]; ntot <- ntot[good]; nn <- nn[good];

	nprime <- 1/(1/nn + 1/fit$c); # n1c, n2c, etc. in text
	V <- (fit$a + sum(sumsq) + sum(nprime*m^2) - (sum(nprime*m))^2/(fit$b+sum(nprime))) /
		((fit$v + sum(ntot) + 1) * (fit$b + sum(nprime)));
	mu0 <-sum(m * nprime)/(sum(nprime) + fit$b);
	df <- fit$v + sum(ntot) + 1;
	if (mode==0) return( pt(-mu0/sqrt(V), df=df, lower=FALSE) );
	if (mode>0) return( pt((minLogRatio-mu0)/sqrt(V),df=df,lower=FALSE) );
	return( pt((-mu0-minLogRatio)/sqrt(V), df=df,lower=TRUE) );
}

TestUarrayVsModel <- function(m, sumsq, fit=list(a=0.3, b=0.1,c=Inf,bc=0.1,v=3.25),
			n1=10, n2=NULL, minLogRatio=log(1.5)/log(2)) {
	ntot <- if(is.null(n2)) n1 else n1 + n2 - 1;
	nn <- if(is.null(n2)) n1 else n1 * n2/(n1 + n2);
	nprime <- 1/(1/nn + 1/fit$c);
	V <- (fit$a + m^2 * nprime * fit$b/(fit$b + nprime) + sumsq)/((fit$b + nprime)*(fit$v + ntot + 1));
	mu0 <- m * nprime/(nprime + fit$b);
	p <- pt(-mu0/sqrt(V), df=fit$v + ntot + 1, lower=FALSE);
	pUp <- pt((minLogRatio-mu0)/sqrt(V),df=fit$v+ntot+1,lower=FALSE);
	pDown <- pt((-mu0-minLogRatio)/sqrt(V), df=fit$v+ntot+1,lower=TRUE);
	bad <- ntot<1 | andNoNA(nn==0);
	return(data.frame(p=ifelse(bad,0.5,p),
			pUp=ifelse(bad,1.0,pUp),
			pDown=ifelse(bad,1.0,pDown),
			mu0=mu0,
			V=V,
			df=ifelse(bad,NA,fit$v + ntot + 1)));
}

# This fits a linear model with systematic errors to summarized statistics.
# pair described operon pairs, and must include:
#	pOp, index1, and index2 (or binary sOp instead of pOp if useP is FALSE)
#	pOp is the probability of being an operon
#	index1 and index2 are the row numbers of the two genes
#
# mu, sumsq, n1, and (for genomic control or external reference experiments) n2 are
# the mean, the total squared deviance, and the number of replicate measurements for each gene.
#
# Returns a fit object with a (alpha),b (beta),c (gamma),bc (beta-primed), and
# a test of whether bias is present (chisqC and the p-value chisqCP from the maximum
# likelihood ratio test)
FitUarrayData <- function(pair,mu,sumsq, n1, n2=NULL, useP=TRUE, useBias=TRUE, debug=FALSE) {
	if (is.null(n2)) {
		ntot <- n1;
		nn <- n1; # N in the paper
	} else { # genomic control
		ntot <- n1 + n2 - 1; # powers of sqrt(theta) in likelihood function for each gene
		nn <- n1*n2/(n1+n2); # multiplier of (muObs - mu)^2 in likelihood function for each gene
	}
	fit <- fitFDist(sumsq/(ntot-1),ntot-1); # sumsq depends on ntot, not on nn
	a <- fit$scale*fit$df2;
	v <- fit$df2-1;
	good <- andNoNA(is.finite(sumsq/(ntot-1)) & nn >= 1);
	meanM2 <- mean(mu[good]^2);
	bc <- 1/( (v-1)/a * meanM2 - mean(1/nn[good])); # starting point
	if(bc < 1e-6) bc <- 0.01;
	f <- function(x) { if(x < 1e-6) return(1e20);
			v <- OpUarrayLLNoBias(mu,sumsq,n1,n2,a,x,v);
			v <- -v;
			attr(v,"gradient") <- -attr(v,"gradient");
			return(v);
	};
	nlmOut <- nlm(f, bc, check.analyticals=debug);
	if(debug) cat("nlm bc","min",nlmOut$minimum,"est(bc)",nlmOut$estimate,"grad",nlmOut$gradient,
				"code",nlmOut$code,"iter",nlmOut$iterations,"\n");
	bc <- nlmOut$estimate;
	fit <- list(a=a,bc=bc,b=bc,c=Inf,v=v);
	if(!is.null(pair) && bc > 1e-5 && useBias) {
		p <- if(useP) pair[pair$pOp>.5,] else pair[pair$sOp,];
		if(!useP) p$pOp <- rep(1,nrow(p));
		p <- p[good[p$index1] & good[p$index2],];
		if(debug) cat("Using",nrow(p),"good operon pairs to estimate bias\n")
		# - sign to maximize, 4*bc a legal and plausible starting point
		# use 1/c as the parameter because then nlm behaves better
		#	(numerical derivatives issue?)
		#
		# For gradient computation: f is the log likelihood if an operon
		# d(log(1-p+p*exp(f(c))))/dc = p*exp(f(c))*df/dc/(1-p+p*exp(f(c)))
		# dg/d(1/c) = dg/dc / (d(1/c)/dc) = -c^2 * df/dc = -(1/c)^-2 * df/dc
		relLikCInv <- function(x) {
					if(length(x) != 1) stop("More than 1 value");
					liks <- relLik(mu[p$index1],sumsq[p$index1],
						mu[p$index2],sumsq[p$index2],
						a=a,bc=bc,c=1/x,v=v,
						n1tot=ntot[p$index1],
						n2tot=ntot[p$index2],
						nn1=nn[p$index1],
						nn2=nn[p$index2]);
					dllsdc <- relLikLogDeriv(mu[p$index1],sumsq[p$index1],
							mu[p$index2],sumsq[p$index2],
							a=a,bc=bc,c=1/x,v=v,
							n1tot=ntot[p$index1],
							n2tot=ntot[p$index2],
							nn1=nn[p$index1],
							nn2=nn[p$index2]);
					ratios <- 1-p$pOp + p$pOp*liks;
					out <- -sum(log(ratios));
					attr(out,"gradient") <- x^-2*sum(p$pOp*liks*dllsdc/ratios);
					return(out);
		}
		# c=4*bc (3:1 signal:bias) is a reasonable starting point
		nlmOut <- nlm(relLikCInv, 1/(4*bc), check.analyticals=debug);
		if(debug) cat("nlm c","min",nlmOut$minimum,"est(1/c)",nlmOut$estimate,"grad",nlmOut$gradient,
					"code",nlmOut$code,"iter",nlmOut$iterations,"\n");
		fit$c <- 1/nlmOut$estimate;
		fit$b <- 1/(1/fit$bc-1/fit$c);

		# for max. likelihood ratio test (df=1)
		fit$chisqC <- 2*((-nlmOut$minimum) - (-relLikCInv(0))); # c=Inf or 1/c=0
		# /2 is because we are optimizing a variable at a boundary, and under the
		# null hypothesis, half the time the optimal estimate will be 1/c=0.
		# See Self and Liang 1987 J. Am. Stat. Assoc. 82:605-610
		fit$chisqCP <- pchisq(fit$chisqC,df=1,lower=FALSE)/2;
	}
	return(fit);
}

# Assuming genes 1 and 2 are in an operon, the likelihood ratio
# for the data given that they are in an operon versus if they were not
#
# m1 is the observed mean, ss1 is the total squared deviance, n1 is the #data points
# or degrees of freedom, nn1 is the second degrees of freedom (N_1 in the text)
# and similarly for gene 2
relLik <- function(m1,ss1,m2,ss2,a=0.3,bc=.1,c=0.5,v=3.25,
			n1tot=10,n2tot=10, nn1=n1tot, nn2=n2tot) {
	if(c-bc < 1e-6) return( 0 );
	b <- 1/(1/bc-1/c);
	n1c <- 1/(1/nn1+1/c);
	n2c <- 1/(1/nn2+1/c);

	return( (a/2)^((v+1)/2-(v+1))
		* sqrt(b/(b+n1c+n2c)) * 1/sqrt((1+nn1/c)*(1+nn2/c)) * sqrt((bc+nn1)*(bc+nn2))/bc
		* gamma((v+n1tot+n2tot+1)/2)*gamma((v+1)/2)/(gamma((v+n1tot+1)/2)*gamma((v+n2tot+1)/2))
		* ((a + ss1 + m1^2*nn1*bc/(bc+nn1))/2)^((v+n1tot+1)/2) 
		* ((a + ss2 + m2^2*nn2*bc/(bc+nn2))/2)^((v+n2tot+1)/2) 
		/ ((a + ss1 + ss2 + n1c*m1^2 + n2c*m2^2 - (m1*n1c + m2*n2c)^2/(b+n1c+n2c))/2)^((v+n1tot+n2tot+1)/2) );
}

# derivative of the logarithm of relLik() with respect to c
relLikLogDeriv <- function(m1,ss1,m2,ss2,a=0.3,bc=.1,c=0.5,v=3.25,
			n1tot=10,n2tot=10, nn1=n1tot, nn2=n2tot) {
	if(c-bc < 1e-6) return( NA );

	b <- 1/(1/bc-1/c);
	n1c <- 1/(1/nn1+1/c);
	n2c <- 1/(1/nn2+1/c);

	# derivatives
	dbdc <- -(c/bc - 1)^-2;
	dn1cdc <- (1 + c/nn1)^-2;
	dn2cdc <- (1 + c/nn2)^-2;

	dcombdc <- (dn1cdc*m1^2 + dn2cdc*m2^2
			- 2*(m1*n1c + m2*n2c)*(m1*dn1cdc + m2*dn2cdc)/(b+n1c+n2c)
			- -(m1*n1c + m2*n2c)^2/(b+n1c+n2c)^2 * (dbdc+dn1cdc+dn2cdc) )/2;

	return(0.5*dbdc/b - 0.5*(dbdc+dn1cdc+dn2cdc)/(b+n1c+n2c)
		- 0.5*(-nn1/c^2)/(1+nn1/c) - 0.5*(-nn2/c^2)/(1+nn2/c)
		- (v+n1tot+n2tot+1)/2 * dcombdc
			/ ((a + ss1 + ss2 + n1c*m1^2 + n2c*m2^2 - (m1*n1c+m2*n2c)^2/(b+n1c+n2c))/2) );
}

OpUarrayLLNoBias <- function(m,sumsq,n1,n2,a,b,v) {
	ntot <- if(is.null(n2)) n1 else n1 + n2 - 1;
	nn <- if(is.null(n2)) n1 else 1/(1/n1 + 1/n2);
	good <- (ntot >= 0) & is.finite(m);
	if(!any(good)) return(0); # no data
	m <- m[good]; sumsq <- sumsq[good]; ntot <- ntot[good]; nn <- nn[good];

	ll <- sum( log(a)*(v+1)/2 + log(b)/2 - lgamma((v+1)/2) - log(2)*(v+1)/2
			- (ntot+1)/2 * log(2*pi)
			+ (log(2*pi) - log(b+nn))/2 + lgamma((v+ntot+1)/2)
			- ((v+ntot+1)/2) * log((a + sumsq + m^2*nn*b/(b+nn))/2) );
	attr(ll,"gradient") <- 0.5*sum(1/b - 1/(b+nn) - (v+ntot+1)*m^2*nn^2/((a+sumsq)*(b+nn)^2 + m^2*nn*b*(b+nn)));
	return(ll);

}

### Taken from ebayes.R in LIMMA, by Gordon Smyth
### Reference: G.K. Smyth (2004) "Linear Models and Empirical Bayes Methods for Assessing
### 	Differential Expression in Microarray Experiments." Statistical Applications in
###	Genetics and Molecular Biology 3(1) Article 3.
###
fitFDist <- function(x,df1) {
#	Moment estimation of the parameters of a scaled F-distribution
#	The first degrees of freedom is given
#	Gordon Smyth
#	8 Sept 2002.  Last revised 12 Apr 2003.

#	Remove missing or infinite values and zero degrees of freedom
	o <- is.finite(x) & is.finite(df1) & (x > 0) & (df1 > 0)
	if(any(!o)) {
		x <- x[o]
		df1 <- df1[o]
	}
	n <- length(x)
	if(n==0) return(list(scale=NA,df2=NA))
	
#	Better to work on with log(F)
	z <- log(x)
	e <- z-digamma(df1/2)+log(df1/2)
	emean <- mean(e)
	evar <- mean(n/(n-1)*(e-emean)^2-trigamma(df1/2))
	if(evar > 0) {
		df2 <- 2*trigammaInverse(evar)
		s20 <- exp(emean+digamma(df2/2)-log(df2/2))
	} else {
		df2 <- Inf
		s20 <- exp(emean)
	}
	list(scale=s20,df2=df2)
}

trigammaInverse <- function(x) {
#	Solve trigamma(y) = x for y
#	Gordon Smyth
#	8 Sept 2002.  Last revised 12 March 2004.

#	Non-numeric or zero length input
	if(!is.numeric(x)) stop("Non-numeric argument to mathematical function")
	if(length(x)==0) return(numeric(0))

#	Treat out-of-range values as special cases
	omit <- is.na(x)
	if(any(omit)) {
		y <- x
		if(any(!omit)) y[!omit] <- Recall(x[!omit])
		return(y)
	}
	omit <- (x < 0)
	if(any(omit)) {
		y <- x
		y[omit] <- NaN
		warning("NaNs produced")
		if(any(!omit)) y[!omit] <- Recall(x[!omit])
		return(y)
	}
	omit <- (x > 1e7)
	if(any(omit)) {
		y <- x
		y[omit] <- 1/sqrt(x[omit])
		if(any(!omit)) y[!omit] <- Recall(x[!omit])
		return(y)
	}
	omit <- (x < 1e-6)
	if(any(omit)) {
		y <- x
		y[omit] <- 1/x[omit]
		if(any(!omit)) y[!omit] <- Recall(x[!omit])
		return(y)
	}

#	Newton's method
#	1/trigamma(y) is convex, nearly linear and strictly > y-0.5,
#	so iteration to solve 1/x = 1/trigamma is monotonically convergent
	Rversion <- as.numeric(version$major)+as.numeric(version$minor)/10
	if(Rversion > 1.81)
		tetraGamma <- function(x) psigamma(x,deriv=2)
	else
		tetraGamma <- tetragamma
	y <- 0.5+1/x
	iter <- 0
	repeat {
		iter <- iter+1
		tri <- trigamma(y)
		dif <- tri*(1-tri/x)/tetraGamma(y)
		y <- y+dif
		if(max(-dif/y) < 1e-8) break
		if(iter > 50) {
			warning("Iteration limit exceeded")
			break
		}
	}
	y
}

# returns a data frame of nPairs, nGenes, statistic, opAcc (the cumulative accuracy estimate), and other stats
# nPairs can be > nGenes because of genes in large operons;
# nPairs can be < nGenes b/c of genes not in operons
# both Gene1 (the gene in the list) and Gene2 (the gene whose data is being tested -- this may
# not be the same order as in the input) are also included.
#
# input (locusId, statistic) need not be sorted
changersVsOperonPairs <- function(locusId, # vector
				statistic, # must be signed (for up vs. down), e.g., zScore
				# must include Gene1, Gene2, and pOp; including only pOp>.5 is recommended
				pairs,
				flipPairs=TRUE, # need to add reverse of pairs (Gene2,Gene1, same pOp)?
				window=100, # but first set of window is cumulative average
				useP=FALSE, # use P(Operon)?
				up=TRUE,
				both=FALSE,
				minAbs=0) { # only for the 2nd gene in the pair, but this can be biasing...
	if (both) {
		out1 <- changersVsOperonPairs(locusId,statistic,pairs,flipPairs=flipPairs,useP=useP,up=TRUE,minAbs=minAbs);
		out2 <- changersVsOperonPairs(locusId,statistic,pairs,flipPairs=flipPairs,useP=useP,up=FALSE,minAbs=minAbs);
		return(rbind(out1,out2));
	}
	if(flipPairs) {
		pairs2 <- rbind(pairs[,c("Gene1","Gene2","pOp")],
				data.frame(Gene1=pairs$Gene2,Gene2=pairs$Gene1,pOp=pairs$pOp));
	} else {
		pairs2 <- pairs[,c("Gene1","Gene2","pOp")];
	}
	# get statistic for both pairs, remove NA and small (unconfident) values, and sort by stat1
	pairs2 <- merge(pairs2,data.frame(Gene1=locusId,stat1=statistic),by="Gene1");
	pairs2 <- merge(pairs2,data.frame(Gene2=locusId,stat2=statistic),by="Gene2");
	pairs2 <- pairs2[!is.na(pairs2$stat1) & !is.na(pairs2$stat2),];
	pairs2 <- pairs2[abs(pairs2$stat2) >= minAbs,];
	sign <- if(up) 1 else -1;
	pairs2 <- pairs2[order(-sign*pairs2$stat1,pairs2$Gene1),];
	n <- countif(sign*pairs2$stat1>0);
	pairs2 <- pairs2[1:n,];

	out <- data.frame(nPairs=1:n);

	# to compute nGenes, need to sort all of them, whether or not in pairs
	genes <- merge(data.frame(Gene1=locusId,stat1=statistic),pairs2[,c("Gene1","Gene2")],
			by="Gene1", all.x=TRUE,all.y=FALSE);
	genes <- genes[!is.na(genes$stat1),];
	genes <- genes[order(-sign*genes$stat1,genes$Gene1),];
	genes <- genes[sign*genes$stat1>0,];
	# counting different values
	nGenes <- c(1,1+cumsum(ifelse(genes$Gene1[1:(nrow(genes)-1)] != genes$Gene1[2:nrow(genes)],1,0)));
	out$nGenes <- nGenes[!is.na(genes$Gene2)]; # only keep rows that were in pairs2

	out$statistic <- pairs2$stat1;

	out$bAgree <- ifelse(sign*pairs2$stat2>0,1,0);
	out$pOp <- pairs2$pOp;
	out$raw <- ifelse(out$bAgree,1,-1)/(if(useP) out$pOp else rep(1,n));
	# smooth raw over window size to give opAcc
	if(n > window) {
		out$opAcc <- cumsum(out$raw);
		out$opAcc[(window+1):n] <- out$opAcc[(window+1):n] - out$opAcc[1:(n-window)];
		out$opAcc <- out$opAcc/pmin(1:n,window);
	} else {
		out$opAcc <- cumsum(out$raw)/(1:n);
	}
	out$cumAcc <- cumsum(out$raw)/(1:n);
	out$Gene1 <- pairs2$Gene1;
	out$Gene2 <- pairs2$Gene2;
	return(out);
}

# number of times x is TRUE
countif <- function(x) { return(sum(ifelse(x,1,0))); }

# percentage of time that x is TRUE
countPerIf <- function(x,condition=NULL) {
	if(!is.null(condition)) { x <- x[condition]; }
	 return (sum(ifelse(x,1,0))/length(x));
}

withoutNA <- function(x) { x[!is.na(x)]; }

andNoNA <- function(x) { ifelse(is.na(x),FALSE,x) }

logodds <- function(p) { return(log(p/(1-p))); }

logodds2p <- function(x) { return( 1/(1+exp(-x)) ); }

cutN <- function(x,n=8,...) { return(cut(x,breaks=unique(quantile(x,seq(0,1,1/n))),include.lowest=TRUE,...)); }

# Plots

OperonAgreementVsConfidencePlot <- function(analyzed,...) {
	plot(analyzed$changersCMeans,analyzed$changersPAgreesAdj,type="b",xlim=c(0.5,1),ylim=c(0,1.1),
			xlab="Confidence of Measurement",
			ylab="Agreement with Operons (Adjusted)",...);
	arrows(analyzed$changersCMeans,analyzed$changersPAgreesAdjConf[,1],
		analyzed$changersCMeans,analyzed$changersPAgreesAdjConf[,2],
		code=3,angle=90,col="grey")
}

SignificantChangersPlot <- function(analyzed, bar=0.01) {
	plot(1:nrow(analyzed$stats), sort(1-2*abs(analyzed$stats$pow0-.5)), type="l",log="xy",
			xlab="# Genes", ylab="Significance of difference from zero (two-tailed)",
			ylim=c(1e-5,1),xlim=c(1,1000));
	lines(1:nrow(analyzed$stats), sort(1-2*abs(analyzed$stats$p0-.5)), lty=2);
	lines(c(1e-40,1e20),c(bar,bar),col="darkgrey");
	legend(1,1,c("Single-gene","Operon-wise"),col=c(1,1),lwd=c(1,1),lty=2:1,pch=c(30,30));
}


### Simulator -- useful for verifying the correct behavior of the method
### or for identifying discrepancies between the model and the data
###
### Usually, do something like
### SimulateUarrayVsOperons(analyzed$locusId, opPred,
###				fit=analyzed$fit,
###				n = 3,		# actual number of replicate experiments
###				n2 = NULL,	# or actual number of control measurements
###				pNA=countPerIf(is.na(c(raw[,2:5],recursive=TRUE))))
###
### (where 2:6 are the numbers of the data columns in the raw data)
###
### Returns -- the same as AnalyzeUarrayVsOperons() [or as SummarizeUarrayVsOperons() if
###			analyze=FALSE]
SimulateUarrayVsOperons <- function(locusIds, opPreds, # with Gene1, Gene2, and pOp, or NULL
				# standard model taken from Shewanella salt data
				fit=NULL,
				a = if(is.null(fit)) 0.3 else fit$a,
				bc = if(is.null(fit)) 0.1 else fit$bc,
				c = if(is.null(fit)) 0.28 else fit$c,
				v = if(is.null(fit)) 3.25 else fit$v,
				b = 1/(1/bc-1/c),
				n = 10, # number of replicates (must be constant)
				control=FALSE, # true if have separate control chips
				n2 = n, # number of replicates for control data (if not on same chip)
				pNA = 0.05, # proportion of missing measurements

				# lots of alternatives, defaulting to standard model
				# sd(mu) = (changer? s : s2) + sqrt(sFactor)*sigma
				pChange = 1.0, # by default all genes change
				s = 0.0, # standard deviation of true mu
				s2 = 0.0, # standard deviation of true mu for non-changers
				sFactor = 1/b, # variance ratio for changers
				sFactorNC = sFactor, # variance ratio for non-changers

				# s.d. of replicate measurements = sigmaFactor/sqrt(rgamma(shape,scale))
				sigmaFactor = 1,
				shape=(v+1)/2, scale=2/a,
				sigma = sigmaFactor/sqrt(rgamma(length(locusIds),shape=shape,scale=scale)),
				sdBias = 0, # systematic error (sigma is between replicates)
				scaledBias = 1/c, # systematic error relative to sigma^2 (like sFactor)
				matchSigma=TRUE, # force sigma/theta to match across operon

				sweep=FALSE, # sweep means to be zero

				debug=FALSE,
				seed=NULL,
				analyze=TRUE, # run AnalyzeUarrayVsOperons
				estimate=TRUE, # if false, analyze with true parameters
				... # for AnalyzeUarrayVsOperons
				) {
	if (is.null(seed)) seed <- floor(runif(1)*1e9);
	set.seed(seed);
	changer <- runif(length(locusIds)) < pChange;
	mu <- rnorm(length(locusIds)) * (ifelse(changer, s, s2) + sigma*sqrt(ifelse(changer,sFactor,sFactorNC)));

	# make mu and perhaps sigma agree for all operon pairs
	if (!is.null(opPreds)) {
		oppairs <- merge(opPreds[,c("Gene1","Gene2","pOp")],
					data.frame(Gene1=locusIds,index1=1:length(locusIds)),by="Gene1");
		oppairs <- merge(oppairs,data.frame(Gene2=locusIds,index2=1:length(locusIds)),by="Gene2");
		oppairs$sOp <- runif(nrow(oppairs)) < oppairs$pOp;
		oppairs <- oppairs[order(oppairs$index1,oppairs$index2),];
		oppairsT <- oppairs[oppairs$sOp,];

		mu[oppairsT$index1] <- mu[oppairsT$index2];
		if (matchSigma) sigma[oppairsT$index1] <- sigma[oppairsT$index2];
		while (countif(badpair <- (mu[oppairsT$index1] != mu[oppairsT$index2])) > 0) {
			if(debug) cat("Syncing operon pairs, mismatch left", countif(badpair),"\n");
			mu[oppairsT$index2] <- mu[oppairsT$index1];
			if(matchSigma) sigma[oppairsT$index2] <- sigma[oppairsT$index1];
		}
	}

	bias <- rnorm(length(locusIds)) * (sdBias + sigma*sqrt(scaledBias));

	ss <- function(x) sum( withoutNA(x - mean(x,na.rm=TRUE))^2 );
	if (control) {
		
		data1 <- t(sapply(1:length(locusIds),
			function(x) ifelse(runif(n)<pNA,NA,rnorm(n,mean=mu[x]+bias[x],sd=sigma[x]))))
		data2 <- t(sapply(1:length(locusIds),
			function(x) ifelse(runif(n2)<pNA,NA,rnorm(n2,mean=0,sd=sigma[x]))));
		muObs <- apply(data1,1,mean,na.rm=TRUE) - apply(data2,1,mean,na.rm=TRUE);
		# estimated standard deviation of mu: Var(SumX/N - SumY/N) = (Var(X)+Var(Y))/N)
		sumsq <- apply(data1,1,ss) + apply(data2,1,ss);
	} else {
		data1 <- t(sapply(1:length(locusIds),
			function(x) ifelse(runif(n)<pNA,NA,rnorm(n,mean=mu[x]+bias[x],sd=sigma[x]))));
		data2 <- NULL;
		muObs <- apply(data1,1,mean,na.rm=TRUE);
		sumsq <- apply(data1,1,ss);
	}
	if(sweep) muObs <- muObs - mean(muObs);
	if (!is.null(opPreds)) oppairs$agree <- (muObs[oppairs$index1]>0) == (muObs[oppairs$index2]>0);
	n1 <- apply(data1, 1, function(x) countif(!is.na(x)))
	n2 <- if(is.null(data2)) NULL else apply(data2, 1, function(x) countif(!is.na(x)));
	sim <- list(mu=mu,sigma=sigma,bias=bias,muObs=muObs,sumsq=sumsq,
			n1=n1,n2=n2,
			changer=changer,
			oppairs=if(is.null(opPreds)) NULL else oppairs,data1=data1,data2=data2,
			control=control,
			locusId=locusIds,
			# summary statistics
			medianSS=median(sumsq,na.rm=TRUE),
			sdMuObs=sd(muObs,na.rm=TRUE),
			seed=seed,
			trueFit=list(a=a,bc=bc,b=b,c=c,v=v),
			pOpAgree=if(is.null(opPreds)) NULL else countPerIf(withoutNA(oppairs$agree[oppairs$pOp>.5])));
	if(!estimate) sim$fit <- sim$trueFit;
	if(analyze) return(AnalyzeUarrayVsOperons(sim,debug=debug,...));
	return(sim);
}

## No-replicate analyses. The p-values for each gene are named as for
## replicate-based OpWise (p0 and pow0 for single-gene and operon-wise posterior
## probabilities that the true log-ratio is greater than 0; pFold and powFold for the
## single-gene and operon-wise posterior probabilities that the true change is in
## the observed direction and by fold or more.
OpWiseNoRepP <- function(muObs,pairs,fit=OpWiseNoRepFit(muObs,pairs),
			locusId=NULL,
			fold=1.5,base=2,minLogRatio=log(fold)/log(base)) {
	sqrtV <- sqrt( (fit$a + muObs^2*fit$b/(1+fit$b)) / ((fit$v+2)*(1+fit$b)) );
	mu0 <- muObs/(1+fit$b);
	p0 <- pt( -mu0/sqrtV, df=fit$v+2, lower=FALSE);
	pUp <- pt((minLogRatio-mu0)/sqrtV, df=fit$v+2, lower=FALSE);
	pDown <- pt((-minLogRatio-mu0)/sqrtV, df=fit$v+2, lower=TRUE);
	out <- data.frame(muObs=muObs,p0=p0,pUp=pUp,pDown=pDown);
	if(!is.null(locusId)) out$locusId<-locusId;

	pairs$pOpPost <- logodds2p(logodds(pairs$pOp) + OpWiseNoRepLRatio(muObs,pairs,fit$a,fit$b,fit$v));
	pairs$pOpPost[is.na(pairs$pOpPost)] <- pairs$pOp[is.na(pairs$pOpPost)];

	pairs$row <- 1:nrow(pairs);
	pairsP <- as.data.frame(t(as.matrix(sapply(pairs$row,
		function(i) OpWiseNoRepTestOp(muObs[c(pairs$index1[i],pairs$index2[i])], 
							fit$a, fit$b, fit$v, minLogRatio=minLogRatio)))));
	names(pairsP) <- c("pp0","ppUp","ppDown");
	pairs <- cbind(pairs,pairsP);

	triples <- merge(pairs,pairs,by.x="Gene2",by.y="Gene1");
	triples$row <- 1:nrow(triples);
	triplesP <- as.data.frame(t(sapply(triples$row,
		function(i) OpWiseNoRepTestOp(muObs[c(triples$index1.x[i],triples$index2.x[i],
							triples$index2.y[i])],
						fit$a, fit$b, fit$v, minLogRatio=minLogRatio))));
	names(triplesP) <- c("ppp0","pppUp","pppDown");
	triples <- cbind(triples,triplesP);

	inPair1 <- merge(data.frame(index1=1:length(muObs)),pairs,all.x=TRUE,all.y=FALSE);
	inPair2 <- merge(data.frame(index2=1:length(muObs)),pairs,all.x=TRUE,all.y=FALSE);
	# (has to be the middle in the triple)
	inTriple <- merge(data.frame(index2.x=1:length(muObs)),triples,all.x=TRUE,all.y=FALSE);

	# ordered by muObs and expanded with NA rows for missing entries
	ppair1 <- pairs[inPair1$row,];
	ppair2 <- pairs[inPair2$row,];
	ptriple <- triples[inTriple$row,];

	for(i in c("pp0","ppUp","ppDown","pOpPost")) {
		ppair1[[i]][is.na(ppair1[[i]])] <- 0;
		ppair2[[i]][is.na(ppair2[[i]])] <- 0;
	}
	for(i in c("ppp0","pppUp","pppDown")) {
		ptriple[[i]][is.na(ptriple[[i]])] <- 0;
	}

	out$pow0 <- (ppair1$pOpPost*ppair2$pOpPost*ptriple$ppp0
			+ ppair1$pOpPost*(1-ppair2$pOpPost)*ppair1$pp0
			+ (1-ppair1$pOpPost)*ppair2$pOpPost*ppair2$pp0
			+ (1-ppair1$pOpPost)*(1-ppair2$pOpPost)*out$p0);
	out$powUp <- (ppair1$pOpPost*ppair2$pOpPost*ptriple$pppUp
			+ ppair1$pOpPost*(1-ppair2$pOpPost)*ppair1$ppUp
			+ (1-ppair1$pOpPost)*ppair2$pOpPost*ppair2$ppUp
			+ (1-ppair1$pOpPost)*(1-ppair2$pOpPost)*out$pUp);
	out$powDown <- (ppair1$pOpPost*ppair2$pOpPost*ptriple$pppDown
			+ ppair1$pOpPost*(1-ppair2$pOpPost)*ppair1$ppDown
			+ (1-ppair1$pOpPost)*ppair2$pOpPost*ppair2$ppDown
			+ (1-ppair1$pOpPost)*(1-ppair2$pOpPost)*out$pDown);

	out$pFold <- ifelse(is.na(muObs),0,ifelse(out$muObs>0,out$pUp,out$pDown));
	out$powFold <- ifelse(is.na(muObs),0,ifelse(out$muObs>0,out$powUp,out$powDown));
	return(out);
}

# Given a and v, b is implied by a according to
# mean(muObs^2) = a/(v-1) * (1+1/b)
OpWiseNoRepLL <- function(a, v, muObs, pairs) {
	if(length(a) != 1 || length(v) != 1 ) stop("More than one value");
	if(a < 1e-6) return(-1e20);
	b <- 1/(mean(muObs^2,na.rm=TRUE)*((v-1)/a) - 1);
	if(b < 0 || b > 1e6) return(-1e20);
	m1 <- muObs[pairs$index1];
	m2 <- muObs[pairs$index2];
	C <- 1/(2*pi) *sqrt(b/(b+2)) * (a/2)^((v+1)/2) / gamma((v+1)/2);
	M = a+m1^2+m2^2-(m1+m2)^2/(b+2);
	p12op <- C * gamma((v+3)/2) * (M/2)^(-(v+3)/2);
	bb <- b/(1+b);
	p1nop <- dt(m1*sqrt((v+1)*bb/a),df=v+1)*sqrt((v+1)*bb/a);
	p2nop <- dt(m2*sqrt((v+1)*bb/a),df=v+1)*sqrt((v+1)*bb/a);
	ppair <- (1-pairs$pOp)*p1nop*p2nop + pairs$pOp*p12op;
	return(sum(log(ppair)));
}

OpWiseNoRepLRatio <- function(muObs,pairs,a,b,v) {
	m1 <- muObs[pairs$index1];
	m2 <- muObs[pairs$index2];
	C <- 1/(2*pi) *sqrt(b/(b+2)) * (a/2)^((v+1)/2) / gamma((v+1)/2);
	M = a+m1^2+m2^2-(m1+m2)^2/(b+2);
	p12op <- C * gamma((v+3)/2) * (M/2)^(-(v+3)/2);
	bb <- b/(1+b);
	p1nop <- dt(m1*sqrt((v+1)*bb/a),df=v+1)*sqrt((v+1)*bb/a);
	p2nop <- dt(m2*sqrt((v+1)*bb/a),df=v+1)*sqrt((v+1)*bb/a);
	return(log(p12op)-log(p1nop)-log(p2nop));
}

# Doesn't try to estimate v -- it must be given
OpWiseNoRepFit <- function(muObs,pairs,v0=3,b0=2) {
	a0 <- mean(muObs^2)*(v0-1)/(1+1/b0);
	pairs <- pairs[pairs$pOp>=.5 & !is.na(muObs[pairs$index1]) & !is.na(muObs[pairs$index2]),];
	
	param <- nlm(function(param) -OpWiseNoRepLL(param[1],param[2],muObs,pairs),c(a0,v0))$estimate;
	a <- param[1]; v <- param[2];
	b <- 1/(mean(muObs^2,na.rm=TRUE)*((v-1)/a) - 1);
	return(data.frame(a=a,b=b,v=v,LL=OpWiseNoRepLL(a,v,muObs,pairs)));
}

# P(operon mu > 0| values for operon members)
# also does p0,pUp,pDown
# assumes that the operon is correct
OpWiseNoRepTestOp <- function(mus,a,b,v,fold=1.5,base=2,minLogRatio=log(fold)/log(base)) {
	mus <- withoutNA(mus);
	if(length(mus)<1) return(c(NA,NA,NA));
	mm <- mean(mus);
	N <- length(mus);
	sV <- sum((mus-mm)^2);
	mu0 <- mm*N/(N+b);
	sqrtV <- sqrt( (a + sV + mm^2*b*N/(N+b))/((v+N+1)*(N+b)) );
	p0 <- pt(-mu0/sqrtV,df=v+N+1,lower=FALSE);
	pUp <- pt((minLogRatio-mu0)/sqrtV,df=v+N+1,lower=FALSE);
	pDown <- pt((-minLogRatio-mu0)/sqrtV,df=v+N+1,lower=TRUE);
	return(c(p0=p0,pUp=pUp,pDown=pDown));
}