% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/scoreGeneSet.R
\name{scoreGeneSet}
\alias{scoreGeneSet}
\title{Score gene set activity for each cell}
\usage{
scoreGeneSet(
  x,
  set,
  rank = 1,
  scale = FALSE,
  block = NULL,
  block.weight.policy = c("variable", "equal", "none"),
  variable.block.weight = c(0, 1000),
  extra.work = 7,
  iterations = 1000,
  seed = 5489,
  realized = TRUE,
  num.threads = 1
)
}
\arguments{
\item{x}{A matrix-like object where rows correspond to genes or genomic features and columns correspond to cells.
Typically, the matrix is expected to contain log-expression values.}

\item{set}{Integer, logical or character vector specifying the rows that belong to the gene set.}

\item{rank}{Integer scalar specifying the rank of the approximation.
The default value of 1 assumes that each gene set only describes a single coordinated biological function.}

\item{scale}{Logical scalar indicating whether to scale all genes to have the same variance.
This ensures that each gene contributes equally to the PCA, favoring consistent variation across many genes rather than large variation in a few genes.
If \code{block} is specified, each gene's variance is calculated as a weighted sum of the variances from each block. 
Genes with zero variance are ignored.}

\item{block}{Factor specifying the block of origin (e.g., batch, sample) for each cell in \code{x}.
The PCA will be performed on the residuals after regressing out the block effect, ensuring that differences between block do not dominate the variation in the dataset.
Alternatively \code{NULL} if all cells are from the same block.}

\item{block.weight.policy}{String specifying the policy to use for weighting the contribution of different blocks to the PCA.
See the argument of the same name in \code{\link{computeBlockWeights}} for more detail.
Only used if \code{block} is not \code{NULL}.}

\item{variable.block.weight}{Numeric vector of length 2, specifying the parameters for variable block weighting.
See the argument of the same name in \code{\link{computeBlockWeights}} for more detail.
Only used if \code{block} is not \code{NULL} and \code{block.weight.policy = "variable"}.}

\item{extra.work}{Integer scalar specifying the extra dimensions for the IRLBA workspace.
Larger values improve accuracy at the cost of compute time.}

\item{iterations}{Integer scalar specifying the maximum number of restart iterations for IRLBA.
Larger values improve accuracy at the cost of compute time.}

\item{seed}{Integer scalar specifying the seed for the initial random vector in IRLBA.}

\item{realized}{Logical scalar indicating whether to realize \code{x} into an optimal memory layout for IRLBA.
This speeds up computation at the cost of increased memory usage.}

\item{num.threads}{Number of threads to use.}
}
\value{
List containing:
\itemize{
\item \code{scores}, a numeric vector of per-cell scores for each column in \code{x}.
\item \code{weights}, a data frame containing \code{row}, an integer vector of ordered and unique row indices corresponding to the genes in \code{set};
and \code{weight}, a numeric vector of per-gene weights for each gene in \code{row}. 
}
}
\description{
Compute per-cell scores for a gene set, defined as the column sums of a rank-1 approximation to the submatrix for the gene set.
This uses the same approach as the \pkg{GSDecon} package by Jason Hackney, adapted to use an approximate PCA (via IRLBA) and to support blocking.
}
\examples{
library(Matrix)
x <- round(abs(rsparsematrix(1000, 100, 0.1) * 100))
normed <- normalizeCounts(x, size.factors=centerSizeFactors(colSums(x)))
scoreGeneSet(normed, set=c(1,3,5,10,20,100))

}
\seealso{
The \code{compute} and \code{compute_blocked} functions in \url{https://libscran.github.io/gsdecon/}.
}
\author{
Aaron Lun
}
