\name{makeFabiaData}
\alias{makeFabiaData}
\title{Generation of Bicluster Data}
\description{

  \code{makeFabiaData}: \R implementation of  \code{makeFabiaData}.
}
\usage{

makeFabiaData(n,l,p,f1,f2,of1,of2,sd_noise,sd_z_noise,
              mean_z,sd_z,sd_l_noise,mean_l,sd_l)


}
\arguments{
  \item{n}{number of observations.}
  \item{l}{number of samples.}
  \item{p}{number of biclusters.}
  \item{f1}{nn/f1 max. additional samples are active in a bicluster.}
  \item{f2}{n/f2 max. additional observations that form a pattern in a bicluster.}
  \item{of1}{minimal active samples in a bicluster.}
  \item{of2}{minimal observations that form a pattern in a bicluster.}
  \item{sd_noise}{Gaussian zero mean noise std on data matrix.}
  \item{sd_z_noise}{Gaussian zero mean noise std for deactivated hidden factors.}
  \item{mean_z}{Gaussian mean for activated factors.}
  \item{sd_z}{Gaussian std for activated factors.}
  \item{sd_l_noise}{Gaussian zero mean noise std if no observation patterns are present.}
  \item{mean_l}{Gaussian mean for observation patterns.}
  \item{sd_l}{Gaussian std for observation patterns.}
}
\details{

  Essentially the data generation model is the sum of
  outer products of sparse vectors:
   \deqn{X  =  \sum_{i=1}^{p} \lambda_i  z_i^T  +  U}
  where the number of summands  \eqn{p}
  is the number of biclusters.
  The matrix factorization is
  \deqn{X  =  L  Z  +  U}
  and noise free
  \deqn{Y  =  L Z}

   Here \eqn{\lambda_i} are from \eqn{R^n}, \eqn{z_i} from
   \eqn{R^l}, \eqn{L} from \eqn{R^{n \times p}},
   \eqn{Z} from \eqn{R^{p \times l}}, and \eqn{X}, \eqn{U}, \eqn{Y}
   from \eqn{R^{n \times l}}.


   Sequentially \eqn{L_i} are generated using
   \code{n}, \code{f2}, \code{of2}, \code{sd_l_noise}, \code{mean_l},
   \code{sd_l}.
   \code{of2} gives the minimal observations participating in a
   bicluster to which between 0 and \eqn{n/f2} observations are added,
   where the number is uniformly chosen. \code{sd_l_noise} gives the
   noise of observations not participating in the
   bicluster. \code{mean_l} and \code{sd_l} determines the Gaussian from
   which the values are drawn for the observations that participate in
   the bicluster. The sign of the mean is randomly chosen for each
   component.

   Sequentially \eqn{Z_i} are generated using
   \code{l}, \code{f1}, \code{of1}, \code{sd_z_noise}, \code{mean_z},
   \code{sd_z}.
   \code{of1} gives the minimal samples participating in a
   bicluster to which between 0 and \eqn{l/f1} samples are added,
   where the number is uniformly chosen. \code{sd_z_noise} gives the
   noise of samples not participating in the
   bicluster. \code{mean_z} and \code{sd_z} determines the Gaussian from
   which the values are drawn for the samples that participate in
   the bicluster.

   \eqn{U} is the overall Gaussian zero mean
   noise generated by \code{sd_noise}.


  Implementation in \R.

}
\value{
  \item{X}{the noise data from \eqn{R^{n \times l}}.}
  \item{Y}{the noise free data from \eqn{R^{n \times l}}.}
  \item{ZC}{list where i-th element gives samples belonging to i-th bicluster.}
  \item{LC}{list where i-th element gives observations belonging to i-th
    bicluster.}
    }
\seealso{
\code{\link{fabia}},
\code{\link{fabias}},
\code{\link{fabiap}},
\code{\link{fabi}},
\code{\link{fabiasp}},
\code{\link{mfsc}},
\code{\link{nmfdiv}},
\code{\link{nmfeu}},
\code{\link{nmfsc}},
\code{\link{plot}},
\code{\link{extractPlot}},
\code{\link{extractBic}},
\code{\link{plotBicluster}},
\code{\link{Factorization}},
\code{\link{projFuncPos}},
\code{\link{projFunc}},
\code{\link{estimateMode}},
\code{\link{makeFabiaData}},
\code{\link{makeFabiaDataBlocks}},
\code{\link{makeFabiaDataPos}},
\code{\link{makeFabiaDataBlocksPos}},
\code{\link{matrixImagePlot}},
\code{\link{summary}},
\code{\link{show}},
\code{\link{showSelected}},
\code{\link{fabiaDemo}},
\code{\link{fabiaVersion}}
}
\author{Sepp Hochreiter}
\examples{


#---------------
# TEST
#---------------

dat <- makeFabiaData(n = 100,l= 50,p = 3,f1 = 5,f2 = 5,
  of1 = 5,of2 = 10,sd_noise = 3.0,sd_z_noise = 0.2,mean_z = 2.0,
  sd_z = 1.0,sd_l_noise = 0.2,mean_l = 3.0,sd_l = 1.0)

X <- dat[[1]]
Y <- dat[[2]]

matrixImagePlot(Y)
x11()
matrixImagePlot(X)


\dontrun{
#---------------
# DEMO
#---------------

dat <- makeFabiaData(n = 1000,l= 100,p = 10,f1 = 5,f2 = 5,
  of1 = 5,of2 = 10,sd_noise = 3.0,sd_z_noise = 0.2,mean_z = 2.0,
  sd_z = 1.0,sd_l_noise = 0.2,mean_l = 3.0,sd_l = 1.0)

X <- dat[[1]]
Y <- dat[[2]]

matrixImagePlot(Y)
x11()
matrixImagePlot(X)

}
}
\keyword{datagen}
\concept{biclustering}
\concept{sparse coding}
\concept{sparse matrix factorization}