% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/qvar.R
\name{qvar}
\alias{qvar}
\title{Quickly perform a variance estimation in common cases}
\usage{
qvar(
  data,
  ...,
  by = NULL,
  where = NULL,
  alpha = 0.05,
  display = TRUE,
  id,
  dissemination_dummy,
  dissemination_weight,
  sampling_weight,
  strata = NULL,
  scope_dummy = NULL,
  nrc_weight = NULL,
  response_dummy = NULL,
  nrc_dummy = NULL,
  calibration_weight = NULL,
  calibration_dummy = NULL,
  calibration_var = NULL,
  define = FALSE,
  envir = parent.frame()
)
}
\arguments{
\item{data}{The \code{data.frame} containing all the technical information
required to prepare the variance estimation process (see other arguments 
below). Note that this file should contain all the units sampled, 
including the out-of-scope and non-responding units. If a variance
estimation is to be performed right away (when \code{define = FALSE}),
it should also contain the variables of interest.}

\item{...}{One or more calls to a statistic wrapper (e.g. \code{total()}, 
\code{mean()}, \code{ratio()}). See examples and 
\code{\link[=standard_statistic_wrapper]{standard statistic wrappers}}}

\item{by}{A qualitative variable whose levels are used to define domains
on which the variance estimation is performed.}

\item{where}{A logical vector indicating a domain on which the variance 
estimation is to be performed.}

\item{alpha}{A numeric vector of length 1 indicating the threshold
for confidence interval derivation (\code{0.05} by default).}

\item{display}{A logical verctor of length 1 indicating whether
the result of the estimation should be displayed or not.}

\item{id}{The identification variable of the units in \code{data}. 
It should be unique for each row in \code{data} and not contain any 
missing values.}

\item{dissemination_dummy}{A character vector of length 1, the name
of the logical variable in \code{data} indicating whether the unit
does appear in the disseminated file and should be used for point
estimates. It should not contain any missing values.}

\item{dissemination_weight}{A character vector of length 1, the name
of the numerical variable in \code{data} corresponding to the 
dissemination weight of the survey. It should not contain any missing 
values.}

\item{sampling_weight}{A character vector of length 1, the name of the 
numeric variable in \code{data} corresponding to the sampling weights 
of the survey. It should not contain any missing values.}

\item{strata}{A character vector of length 1, the name of the factor 
variable in \code{data} whose level match the stratification
used in the survey. Character variables are coerced to factor.
If defined, it should not contain any missing value. If \code{NULL},
the variance estimation process does not take any stratification
into account.}

\item{scope_dummy}{A character vector of length 1, the name of the logical 
variable in \code{data} indicating whether the unit belongs to the
scope of the survey or not. Numerical variables are coerced to logical.
If defined, it should not contain any missing value. If \code{NULL},
all units are supposed to be within the scope of the survey.}

\item{nrc_weight}{A character vector of length 1, the name of the 
numerical variable in \code{data} corresponding to the weights
after non-response correction. If defined, all responding units 
should have a non-missing value. If \code{NULL}, all
units are supposed to be responding and the variance estimation
process does not include a second phase in order to take non-response
into account.}

\item{response_dummy}{A character vector of length 1, the name of of the logical 
variable in \code{data} indicating whether the unit is a responding 
unit or not. Numerical variables are coerced to logical. \code{response_dummy}
should be defined as long as a \code{nrc_weight} is provided. All units 
in the scope of the survey should have a non-missing value.}

\item{nrc_dummy}{A character vector of length 1, the name of
the logical variable in \code{data} indicating whether the
units did take part in the non-response correction process. 
All units in the scope of the survey should have a non-missing 
value.}

\item{calibration_weight}{A character vector of length 1, the name of the 
numerical variable in \code{data} corresponding to the calibrated
weights. If defined, all responding units should have 
a non-missing value. If \code{NULL}, the variance estimation
process does not take any calibration step into account.}

\item{calibration_dummy}{A character vector of length 1, the name of of the logical 
variable in \code{data} indicating whether the unit did take part
in the calibration process or not. Numerical variables are coerced to 
logical. If defined, all responding units should have a non-missing
value. If \code{NULL}, calibration is supposed to have been performed
on all responding units.}

\item{calibration_var}{A character vector, the name of the variable(s) used in
the calibration process. Logical variables are coerced to numeric. 
Character and factor variables are automatically discretized. 
\code{calibration_var} should be defined as long as a \code{calibration_weight} is 
provided. All units taking part in the calibration process should have
only non-missing values for all variables in \code{calibration_var}.}

\item{define}{Logical vector of lentgh 1. Should a variance wrapper
be defined instead of performing a variance estimation (see details
and examples)?}

\item{envir}{An environment containing a binding to \code{data}.}
}
\description{
\code{qvar} (for "quick variance estimation") is a function
  performing analytical variance estimation in most common cases, that is:
  \itemize{\item stratified simple random sampling \item non-response
  correction (if any) through reweighting \item calibration (if any)}

Used with \code{define = TRUE}, it defines a so-called variance wrapper, that 
is a standalone ready-to-use function that can be applied to the survey dataset 
without having to specify the methodological characteristics of the survey
(see \code{\link{define_variance_wrapper}}).
}
\details{
\code{qvar} performs not only technical but also 
  methodological checks in order to ensure that the standard variance 
  estimation methodology does apply (e.g. equal probability of 
  inclusion within strata, number of units per stratum).
  
  Used with \code{define = TRUE}, the function returns a variance
  estimation \emph{wrapper}, that is a ready-to-use function that
  implements the described variance estimation methodology and
  contains all necessary data to do so (see examples).
  
  Note: To some extent, \code{qvar} is analogous to the \code{qplot} function
  in the ggplot2 package, as it is an easier-to-use function for common
  cases. More complex cases are to be handled by using the core functions of
  the gustave package, e.g. \code{\link{define_variance_wrapper}}.
}
\examples{
### Example from the Information and communication technologies (ICT) survey

# The (simulated) Information and communication technologies (ICT) survey 
# has the following characteristics: 
# - stratified one-stage sampling design
# - non-response correction through reweighting in homogeneous response groups
# - calibration on margins.

# The ict_survey data.frame is a (simulated) subset of the ICT 
# survey file containing the variables of interest for the 612
# responding firms.

# The ict_sample data.frame is the (simulated) sample of 650
# firms corresponding to the ict_survey file. It contains all
# technical information necessary to estimate a variance with
# the qvar() function.

## Methodological description of the survey

# Direct call of qvar()
qvar(

  # Sample file
  data = ict_sample,
  
  # Dissemination and identification information
  dissemination_dummy = "dissemination",
  dissemination_weight = "w_calib",
  id = "firm_id",
  
  # Scope
  scope_dummy = "scope",
  
  # Sampling design
  sampling_weight = "w_sample", 
  strata = "strata",
  
  # Non-response correction
  nrc_weight = "w_nrc", 
  response_dummy = "resp", 
  hrg = "hrg",
  
  # Calibration
  calibration_weight = "w_calib",
  calibration_var = c(paste0("N_", 58:63), paste0("turnover_", 58:63)),
  
  # Statistic(s) and variable(s) of interest
  mean(employees)
 
)

# Definition of a variance estimation wrapper
precision_ict <- qvar(

  # As before
  data = ict_sample,
  dissemination_dummy = "dissemination",
  dissemination_weight = "w_calib",
  id = "firm_id",
  scope_dummy = "scope",
  sampling_weight = "w_sample", 
  strata = "strata",
  nrc_weight = "w_nrc", 
  response_dummy = "resp", 
  hrg = "hrg",
  calibration_weight = "w_calib",
  calibration_var = c(paste0("N_", 58:63), paste0("turnover_", 58:63)),
  
  # Replacing the variables of interest by define = TRUE
  define = TRUE
  
)

# Use of the variance estimation wrapper
precision_ict(ict_sample, mean(employees))

# The variance estimation wrapper can also be used on the survey file
precision_ict(ict_survey, mean(speed_quanti))

## Features of the variance estimation wrapper

# Several statistics in one call (with optional labels)
precision_ict(ict_survey, 
  "Mean internet speed in Mbps" = mean(speed_quanti), 
  "Turnover per employee" = ratio(turnover, employees)
)

# Domain estimation with where and by arguments
precision_ict(ict_survey, 
  mean(speed_quanti), 
  where = employees >= 50
)
precision_ict(ict_survey, 
  mean(speed_quanti), 
  by = division
)

# Domain may differ from one estimator to another
precision_ict(ict_survey, 
  "Mean turnover, firms with 50 employees or more" = mean(turnover, where = employees >= 50),
  "Mean turnover, firms with 100 employees or more" = mean(turnover, where = employees >= 100)
)

# On-the-fly evaluation (e.g. discretization)
precision_ict(ict_survey, mean(speed_quanti > 100))

# Automatic discretization for qualitative (character or factor) variables
precision_ict(ict_survey, mean(speed_quali))

# Standard evaluation capabilities
variables_of_interest <- c("speed_quanti", "speed_quali")
precision_ict(ict_survey, mean(variables_of_interest))

# Integration with \%>\% and dplyr
library(magrittr)
library(dplyr)
ict_survey \%>\% 
  precision_ict("Internet speed above 100 Mbps" = mean(speed_quanti > 100)) \%>\% 
  select(label, est, lower, upper)

}
\seealso{
\code{\link{define_variance_wrapper}}, \code{\link{standard_statistic_wrapper}}
}
