\name{makeFabiaData} \alias{makeFabiaData} \title{Generation of Bicluster Data} \description{ \code{makeFabiaData}: \R implementation of \code{makeFabiaData}. } \usage{ makeFabiaData(n,l,p,f1,f2,of1,of2,sd_noise,sd_z_noise, mean_z,sd_z,sd_l_noise,mean_l,sd_l) } \arguments{ \item{n}{number of observations.} \item{l}{number of samples.} \item{p}{number of biclusters.} \item{f1}{nn/f1 max. additional samples are active in a bicluster.} \item{f2}{n/f2 max. additional observations that form a pattern in a bicluster.} \item{of1}{minimal active samples in a bicluster.} \item{of2}{minimal observations that form a pattern in a bicluster.} \item{sd_noise}{Gaussian zero mean noise std on data matrix.} \item{sd_z_noise}{Gaussian zero mean noise std for deactivated hidden factors.} \item{mean_z}{Gaussian mean for activated factors.} \item{sd_z}{Gaussian std for activated factors.} \item{sd_l_noise}{Gaussian zero mean noise std if no observation patterns are present.} \item{mean_l}{Gaussian mean for observation patterns.} \item{sd_l}{Gaussian std for observation patterns.} } \details{ Essentially the data generation model is the sum of outer products of sparse vectors: \deqn{X = \sum_{i=1}^{p} \lambda_i z_i^T + U} where the number of summands \eqn{p} is the number of biclusters. The matrix factorization is \deqn{X = L Z + U} and noise free \deqn{Y = L Z} Here \eqn{\lambda_i} are from \eqn{R^n}, \eqn{z_i} from \eqn{R^l}, \eqn{L} from \eqn{R^{n \times p}}, \eqn{Z} from \eqn{R^{p \times l}}, and \eqn{X}, \eqn{U}, \eqn{Y} from \eqn{R^{n \times l}}. Sequentially \eqn{L_i} are generated using \code{n}, \code{f2}, \code{of2}, \code{sd_l_noise}, \code{mean_l}, \code{sd_l}. \code{of2} gives the minimal observations participating in a bicluster to which between 0 and \eqn{n/f2} observations are added, where the number is uniformly chosen. \code{sd_l_noise} gives the noise of observations not participating in the bicluster. \code{mean_l} and \code{sd_l} determines the Gaussian from which the values are drawn for the observations that participate in the bicluster. The sign of the mean is randomly chosen for each component. Sequentially \eqn{Z_i} are generated using \code{l}, \code{f1}, \code{of1}, \code{sd_z_noise}, \code{mean_z}, \code{sd_z}. \code{of1} gives the minimal samples participating in a bicluster to which between 0 and \eqn{l/f1} samples are added, where the number is uniformly chosen. \code{sd_z_noise} gives the noise of samples not participating in the bicluster. \code{mean_z} and \code{sd_z} determines the Gaussian from which the values are drawn for the samples that participate in the bicluster. \eqn{U} is the overall Gaussian zero mean noise generated by \code{sd_noise}. Implementation in \R. } \value{ \item{X}{the noise data from \eqn{R^{n \times l}}.} \item{Y}{the noise free data from \eqn{R^{n \times l}}.} \item{ZC}{list where i-th element gives samples belonging to i-th bicluster.} \item{LC}{list where i-th element gives observations belonging to i-th bicluster.} } \seealso{ \code{\link{fabia}}, \code{\link{fabias}}, \code{\link{fabiap}}, \code{\link{fabi}}, \code{\link{fabiasp}}, \code{\link{mfsc}}, \code{\link{nmfdiv}}, \code{\link{nmfeu}}, \code{\link{nmfsc}}, \code{\link{plot}}, \code{\link{extractPlot}}, \code{\link{extractBic}}, \code{\link{plotBicluster}}, \code{\link{Factorization}}, \code{\link{projFuncPos}}, \code{\link{projFunc}}, \code{\link{estimateMode}}, \code{\link{makeFabiaData}}, \code{\link{makeFabiaDataBlocks}}, \code{\link{makeFabiaDataPos}}, \code{\link{makeFabiaDataBlocksPos}}, \code{\link{matrixImagePlot}}, \code{\link{summary}}, \code{\link{show}}, \code{\link{showSelected}}, \code{\link{fabiaDemo}}, \code{\link{fabiaVersion}} } \author{Sepp Hochreiter} \examples{ #--------------- # TEST #--------------- dat <- makeFabiaData(n = 100,l= 50,p = 3,f1 = 5,f2 = 5, of1 = 5,of2 = 10,sd_noise = 3.0,sd_z_noise = 0.2,mean_z = 2.0, sd_z = 1.0,sd_l_noise = 0.2,mean_l = 3.0,sd_l = 1.0) X <- dat[[1]] Y <- dat[[2]] matrixImagePlot(Y) x11() matrixImagePlot(X) \dontrun{ #--------------- # DEMO #--------------- dat <- makeFabiaData(n = 1000,l= 100,p = 10,f1 = 5,f2 = 5, of1 = 5,of2 = 10,sd_noise = 3.0,sd_z_noise = 0.2,mean_z = 2.0, sd_z = 1.0,sd_l_noise = 0.2,mean_l = 3.0,sd_l = 1.0) X <- dat[[1]] Y <- dat[[2]] matrixImagePlot(Y) x11() matrixImagePlot(X) } } \keyword{datagen} \concept{biclustering} \concept{sparse coding} \concept{sparse matrix factorization}