% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/filterVcf.R
\name{filterVcfBasic}
\alias{filterVcfBasic}
\title{Basic VCF filter function}
\usage{
filterVcfBasic(
  vcf,
  tumor.id.in.vcf = NULL,
  use.somatic.status = TRUE,
  snp.blacklist = NULL,
  af.range = c(0.03, 0.97),
  contamination.range = c(0.01, 0.075),
  min.coverage = 15,
  min.base.quality = 25,
  min.supporting.reads = NULL,
  error = 0.001,
  target.granges = NULL,
  remove.off.target.snvs = TRUE,
  model.homozygous = FALSE,
  interval.padding = 50,
  DB.info.flag = "DB"
)
}
\arguments{
\item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf}
function from the VariantAnnotation package.}

\item{tumor.id.in.vcf}{The tumor id in the \code{CollapsedVCF} (optional).}

\item{use.somatic.status}{If somatic status and germline data is available,
then use this information to remove non-heterozygous germline SNPs or
germline SNPS with biased allelic fractions.}

\item{snp.blacklist}{A file with blacklisted genomic regions. Must
be parsable by \code{import} from \code{rtracklayer}, for a example a
BED file with file extension \sQuote{.bed}.}

\item{af.range}{Exclude variants with allelic fraction smaller or greater than
the two values, respectively. The higher value removes homozygous SNPs,
which potentially have allelic fractions smaller than 1 due to artifacts or
contamination. If a matched normal is available, this value is ignored,
because homozygosity can be confirmed in the normal.}

\item{contamination.range}{Count variants in dbSNP with allelic fraction
in the specified range. If the number of these putative contamination 
variants exceeds an expected value and if they are found on almost all 
chromosomes, the sample is flagged as potentially contaminated and extra
contamination estimation steps will be performed later on.}

\item{min.coverage}{Minimum coverage in tumor. Variants with lower coverage
are ignored.}

\item{min.base.quality}{Minimim base quality in tumor. Requires a \code{BQ}
genotype field in the VCF.}

\item{min.supporting.reads}{Minimum number of reads supporting the alt
allele.  If \code{NULL}, calculate based on coverage and assuming sequencing
error of 10^-3.}

\item{error}{Estimated sequencing error rate. Used to calculate minimum
number of supporting reads using \code{\link{calculatePowerDetectSomatic}}.}

\item{target.granges}{\code{GenomicRanges} object specifiying the target
postions. Used to remove off-target reads. If \code{NULL}, do not check
whether variants are on or off-target.}

\item{remove.off.target.snvs}{If set to a true value, will remove all SNVs
outside the covered regions.}

\item{model.homozygous}{If set to \code{TRUE}, does not remove homozygous
variants. Ignored in case a matched normal is provided in the VCF.}

\item{interval.padding}{Include variants in the interval flanking regions of
the specified size in bp. Requires \code{target.granges}.}

\item{DB.info.flag}{Flag in INFO of VCF that marks presence in common
germline databases. Defaults to \code{DB} that may contain somatic variants
if it is from an unfiltered dbSNP VCF.}
}
\value{
A list with elements \item{vcf}{The filtered \code{CollapsedVCF}
object.} \item{flag}{A flag (\code{logical(1)}) if problems were
identified.} \item{flag_comment}{A comment describing the flagging.}
}
\description{
Function to remove artifacts and low confidence/quality variant calls.
}
\examples{

# This function is typically only called by runAbsolute via 
# fun.filterVcf and args.filterVcf.
vcf.file <- system.file("extdata", "example.vcf.gz", package="PureCN")
vcf <- readVcf(vcf.file, "hg19")
vcf.filtered <- filterVcfBasic(vcf)        

}
\seealso{
\code{\link{calculatePowerDetectSomatic}}
}
\author{
Markus Riester
}
