% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/processStudy.R
\encoding{UTF-8}
\name{computeKNNRefSynthetic}
\alias{computeKNNRefSynthetic}
\title{Run a k-nearest neighbors analysis on a subset of the
synthetic dataset}
\usage{
computeKNNRefSynthetic(
  gdsProfile,
  listEigenvector,
  listCatPop = c("EAS", "EUR", "AFR", "AMR", "SAS"),
  studyIDSyn,
  spRef,
  fieldPopInfAnc = "SuperPop",
  kList = seq(2, 15, 1),
  pcaList = seq(2, 15, 1)
)
}
\arguments{
\item{gdsProfile}{an object of class
\code{\link[SNPRelate:SNPGDSFileClass]{SNPRelate::SNPGDSFileClass}}, the
opened Profile GDS file.}

\item{listEigenvector}{a \code{list} with 3 entries:
'sample.id', 'eigenvector.ref' and 'eigenvector'. The \code{list} represents
the PCA done on the 1KG reference profiles and the synthetic profiles
projected onto it.}

\item{listCatPop}{a \code{vector} of \code{character} string
representing the list of possible ancestry assignations. Default:
\code{c("EAS", "EUR", "AFR", "AMR", "SAS")}.}

\item{studyIDSyn}{a \code{character} string corresponding to the study
identifier.
The study identifier must be present in the Profile GDS file.}

\item{spRef}{\code{vector} of \code{character} strings representing the
known super population ancestry for the 1KG profiles. The 1KG profile
identifiers are used as names for the \code{vector}.}

\item{fieldPopInfAnc}{a \code{character} string representing the name of
the column that will contain the inferred ancestry for the specified
data set. Default: \code{"SuperPop"}.}

\item{kList}{a \code{vector} of \code{integer} representing  the list of
values tested for the  K parameter. The K parameter represents the
number of neighbors used in the K-nearest neighbors analysis. If
\code{NULL}, the value \code{seq(2, 15, 1)} is assigned.
Default: \code{seq(2, 15, 1)}.}

\item{pcaList}{a \code{vector} of \code{integer} representing  the list of
values tested for the  D parameter. The D parameter represents the
number of dimensions used in the PCA analysis.  If \code{NULL},
the value \code{seq(2, 15, 1)} is assigned.
Default: \code{seq(2, 15, 1)}.}
}
\value{
a \code{list} containing 4 entries:
\describe{
\item{\code{sample.id}}{ a \code{vector} of \code{character} strings
representing the identifiers of the synthetic profiles analysed.}
\item{\code{sample1Kg}}{ a \code{vector} of \code{character} strings
representing the identifiers of the 1KG reference profiles used to
generate the synthetic profiles.}
\item{\code{sp}}{ a \code{vector} of \code{character} strings representing
the known super population ancestry of the 1KG reference profiles used
to generate the synthetic profiles.}
\item{\code{matKNN}}{ a \code{data.frame} containing the super population
inference for each synthetic profiles for different values of PCA
dimensions \code{D} and k-neighbors values \code{K}. The fourth column title
corresponds to the \code{fieldPopInfAnc} parameter.
The \code{data.frame} contains 4 columns:
\describe{
\item{\code{sample.id}}{ a \code{character} string representing
the identifier of the synthetic profile analysed.}
\item{\code{D}}{ a \code{numeric} strings representing
the value of the PCA dimension used to infer the super population.}
\item{\code{K}}{ a \code{numeric} strings representing
the value of the k-neighbors used to infer the super population.}
\item{\code{fieldPopInfAnc} value}{ a \code{character} string representing
the inferred ancestry.}
}
}
}
}
\description{
The function runs k-nearest neighbors analysis on a
subset of the synthetic data set. The function uses the 'knn' package.
}
\examples{

## Required library
library(gdsfmt)

## Load the demo PCA on the synthetic profiles projected on the
## demo 1KG reference PCA
data(demoPCASyntheticProfiles)

## Load the known ancestry for the demo 1KG reference profiles
data(demoKnownSuperPop1KG)

## Path to the demo Profile GDS file is located in this package
dataDir <- system.file("extdata/demoKNNSynthetic", package="RAIDS")

## Open the Profile GDS file
gdsProfile <- snpgdsOpen(file.path(dataDir, "ex1.gds"))

# The name of the synthetic study
studyID <- "MYDATA.Synthetic"

## Projects synthetic profiles on 1KG PCA
results <- computeKNNRefSynthetic(gdsProfile=gdsProfile,
    listEigenvector=demoPCASyntheticProfiles,
    listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"), studyIDSyn=studyID,
    spRef=demoKnownSuperPop1KG)

## The inferred ancestry for the synthetic profiles for different values
## of D and K
head(results$matKNN)

## Close Profile GDS file (important)
closefn.gds(gdsProfile)

}
\author{
Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
}
