% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/motif_finding.R
\docType{methods}
\name{findMotifHits}
\alias{findMotifHits}
\alias{findMotifHits,character,character-method}
\alias{findMotifHits,character,DNAString-method}
\alias{findMotifHits,character,DNAStringSet-method}
\alias{findMotifHits,PWMatrix,character-method}
\alias{findMotifHits,PWMatrix,DNAString-method}
\alias{findMotifHits,PWMatrix,DNAStringSet-method}
\alias{findMotifHits,PWMatrixList,character-method}
\alias{findMotifHits,PWMatrixList,DNAString-method}
\alias{findMotifHits,PWMatrixList,DNAStringSet-method}
\alias{findMotifHits,PWMatrix,GRanges-method}
\alias{findMotifHits,PWMatrixList,GRanges-method}
\title{Find motif matches in sequences.}
\usage{
findMotifHits(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{character,character}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{character,DNAString}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{character,DNAStringSet}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{PWMatrix,character}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{PWMatrix,DNAString}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{PWMatrix,DNAStringSet}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{PWMatrixList,character}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{PWMatrixList,DNAString}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{PWMatrixList,DNAStringSet}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{PWMatrix,GRanges}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)

\S4method{findMotifHits}{PWMatrixList,GRanges}(
  query,
  subject,
  min.score,
  method = c("matchPWM", "homer2"),
  homerfile = findHomer("homer2"),
  BPPARAM = SerialParam(),
  genome = NULL
)
}
\arguments{
\item{query}{The motifs to search for, either a
\describe{
    \item{\code{character(1)}}{ with the path and file name of a motif
          file with PWM in HOMER format (currently only
          supported for \code{method="homer2"})}
    \item{\code{PWMatrix}}{ with a single PWM}
    \item{\code{PWMatrixList}}{ with several PWMs to search for.}
 }}

\item{subject}{The sequences to be searched, either a
\describe{
    \item{\code{character}}{ with the path and file name of a sequence
          file with DNA sequences in FASTA format}
    \item{\code{DNAString}}{ with a single sequence}
    \item{\code{DNAStringSet}}{ with several sequences}
    \item{\code{GRanges}}{ object with the genomic coordinates
        of the sequences to be searched.}
}}

\item{min.score}{The minimum score for counting a match. Can be given as
a character string containing a percentage (e.g. "85%") of  of the
highest possible score or as a single number.}

\item{method}{The internal method to use for motif searching. One of
\describe{
    \item{\code{"matchPWM"}}{ using Biostrings::matchPWM (optimized)}
    \item{\code{"homer2"}}{ call to the homer2 binary}
}
Please note that the two methods might give slightly different results
(see details).}

\item{homerfile}{Path and file name of the \code{homer2} binary.}

\item{BPPARAM}{An optional \code{\link[BiocParallel]{BiocParallelParam}}
instance determining the parallel back-end to be used during evaluation.}

\item{genome}{\code{BSgenome} object that is the reference genome of the
subject. This argument is set to NULL by default and only used by the
function when the subject is a \code{GRanges} object. It is then
necessary to specify the genome so that the function can internally
convert the genomic regions into a \code{DNAStringSet} object.}
}
\value{
A \code{GRanges} object with the matches to \code{query} in
    \code{subject}.
}
\description{
\code{findMotifHits} scans sequences (either provided
as a file, an R object or genomic coordinates)  for matches to
positional weight matrices (provided as a file or as R objects)
}
\details{
The implemented methods (\code{matchPWM} and \code{homer2}) are
    there for convenience (\code{method="matchPWM"} calls
    \code{Biostrings::matchPWM} internally in an optimized fashion, and
    \code{method = "homer2"} calls the command line tool from Homer and
    therefore requires an installation of Homer).

    In general, running \code{findMotifHits} with the same parameters using
    any of the methods generates identical results. Some minor differences
    could occur that result from rounding errors during the necessary
    conversion of PWMs (log2-odd scores) to the probability matrices needed
    by Homer, and the conversion of scores from and to the natural log scale
    used by Homer. These conversions are implemented transparently for the
    user, so that the arguments of \code{findMotifHits} do not have to be
    adjusted (e.g. the PWMs should always contain log2-odd scores, and
    \code{min.score} is always on the log2 scale).

    If there are bases with frequencies of less than 0.001 in a motif, Homer
    will set them to 0.001 and adjust the other frequencies at that motif
    position accordingly so that they sum to 1.0. This may differ from the
    adjustment used when scanning a PWM with \code{matchPWM} (e.g. the
    \code{pseudocounts} argument in the \code{\link[TFBSTools]{toPWM}}
    function), and thus can give rise to differences in reported motif hits
    and hit scores (typically only low-scoring hits).
}
\examples{
seqs <- Biostrings::DNAStringSet(c(s1 = "GTCAGTCGATC", s2 = "CAGTCTAGCTG",
                                   s3 = "CGATCGTCAGT", s4 = "AGCTGCAGTCT"))
m <- rbind(A = c(2, 0, 0),
           C = c(1, 1, 0),
           G = c(0, 2, 0),
           T = c(0, 0, 3))
pwms <- TFBSTools::PWMatrixList(
    TFBSTools::PWMatrix(ID = "m1", profileMatrix = m),
    TFBSTools::PWMatrix(ID = "m2", profileMatrix = m[, 3:1])
)
findMotifHits(pwms, seqs, min.score = 7)

}
