Exploring Sobol indices and randomness with Sobol4R

Context and non random case

Test case: the non monotonic Sobol g function.

The method of Sobol requires two samples. In the reference case there are eight variables, all following the uniform distribution on [0,1].

n <- 50000
p <- 8
X1_1 <- data.frame(matrix(runif(p * n), nrow = n))
X2_1 <- data.frame(matrix(runif(p * n), nrow = n))
set.seed(4669)
gensol1 <- sobol4r_design(
  X1    = X1_1,
  X2    = X2_1,
  order = 2,
  nboot = 100
)

Y1 <- sobol_g_function(gensol1$X)
x1 <- sensitivity::tell(gensol1, Y1)
print(x1)
Sobol4R::autoplot(x1, ncol = 1)
ex1_results <- sobol_example_g_deterministic()
print(ex1_results)
Sobol4R::autoplot(ex1_results, ncol = 1)

Sobol and randomness I: random effect on output variable

Generate data

n <- 50000
X1_r1 <- data.frame(
  C1 = runif(n),
  C2 = runif(n)
)
X2_r1 <- data.frame(
  C1 = runif(n),
  C2 = runif(n)
)

Three settings, two input variables

The deterministic model is sobol4r_g2. The noisy version with Gaussian noise N(0,1) is sobol4r_g2_noise_const. The quantity of interest based on the mean over replications is sobol4r_g2_noise_const_qoi_mean.

set.seed(4669)
gensol2 <- sobol4r_design(
  X1    = X1_r1,
  X2    = X2_r1,
  order = 2,
  nboot = 100
)
Y2 <- sobol_g2_function(gensol2$X)
Y3 <- sobol_g2_additive_noise(gensol2$X)
Y4 <- sobol_g2_qoi_mean(gensol2$X, nrep = 1000)
x2 <- sensitivity::tell(gensol2, Y2)
x3 <- sensitivity::tell(gensol2, Y3)
x4 <- sensitivity::tell(gensol2, Y4)
print(x2)
print(x3)
print(x4)
Sobol4R::autoplot(x2)
Sobol4R::autoplot(x3)
Sobol4R::autoplot(x4)
ex2_results <- sobol_example_random_output()
ex2_results
Sobol4R::autoplot(ex2_results$x_det)
Sobol4R::autoplot(ex2_results$x_noise)
Sobol4R::autoplot(ex2_results$x_qoi)
rm(ex2_results)

Sobol and randomness II: large random effect depending on an input variable

We keep the previously generated values for C1 and C2 and add a third variable C3 distributed as runif(n, min = 1, max = 100). The third variable controls the mean of the Gaussian noise.

n <- 50000
X1_r2 <- data.frame(
  C1 = X1_r1$C1,
  C2 = X1_r1$C2,
  C3 = runif(n, min = 1, max = 100)
)
X2_r2 <- data.frame(
  C1 = X2_r1$C1,
  C2 = X2_r1$C2,
  C3 = runif(n, min = 1, max = 100)
)
head(X1_r1)
head(X1_r2)
set.seed(4669)
gensol3 <- sobol4r_design(
  X1    = X1_r2,
  X2    = X2_r2,
  order = 2,
  nboot = 100
)
Y5 <- sobol_g2_with_covariate_noise(gensol3$X)
Y6 <- sobol_g2_qoi_covariate_mean(gensol3$X, nrep = 1000)
x5 <- sensitivity::tell(gensol3, Y5)
x6 <- sensitivity::tell(gensol3, Y6)
print(x5)
print(x6)
Sobol4R::autoplot(x5)
Sobol4R::autoplot(x6)
ex3_results <- sobol_example_covariate_large()
ex3_results
Sobol4R::autoplot(ex3_results$x_single)
Sobol4R::autoplot(ex3_results$x_qoi)
rm(ex3_results)

Sobol and randomness III: slight random effect depending on an input variable

We now take a third input C3 distributed as runif(n, min = 1, max = 1.5), which induces a much smaller range for the mean of the noise.

n <- 50000
X1_r3 <- data.frame(
  C1 = X1_r1$C1,
  C2 = X1_r1$C2,
  C3 = runif(n, min = 1, max = 1.5)
)
X2_r3 <- data.frame(
  C1 = X2_r1$C1,
  C2 = X2_r1$C2,
  C3 = runif(n, min = 1, max = 1.5)
)
set.seed(4669)
gensol4 <- sobol4r_design(
  X1    = X1_r3,
  X2    = X2_r3,
  order = 2,
  nboot = 100
)
Y7 <- sobol_g2_with_covariate_noise(gensol4$X)
Y8 <- sobol_g2_qoi_covariate_mean(gensol4$X, nrep = 1000)
x7 <- sensitivity::tell(gensol4, Y7)
x8 <- sensitivity::tell(gensol4, Y8)
print(x7)
print(x8)
Sobol4R::autoplot(x7)
Sobol4R::autoplot(x8)
ex4_results <- sobol_example_covariate_small()
ex4_results
Sobol4R::autoplot(ex4_results$x_single)
Sobol4R::autoplot(ex4_results$x_qoi)
rm(ex4_results)

Sobol and randomness IV: random variables with fixed distribution parameters

We now turn to the process model. The uncertain inputs are the distributional parameters of the individual unit model. The quantity of interest is the time needed to reach a given number of successes.

n <- 100

draw_params <- function(n) {
  data.frame(t(replicate(
    n,
    c(
      1 / runif(1, min = 20,  max = 100),
      1 / runif(1, min = 24,  max = 2000),
      1 / runif(1, min = 24,  max = 120),
      runif(1,  min = 0.05, max = 0.3),
      runif(1,  min = 0.3,  max = 0.7)
    )
  )))
}

X1_process <- draw_params(n)
X2_process <- draw_params(n)
set.seed(4669)
gensolp1 <- sobol4r_design(
  X1    = X1_process,
  X2    = X2_process,
  order = 2,
  nboot = 10
)
MM <- 50

Yp1 <- process_fun_row_wise(gensolp1$X, M = MM)
Yp2 <- process_fun_mean_to_M(gensolp1$X, M = MM, nrep = 10)
xp1 <- sensitivity::tell(gensolp1, Yp1)
xp2 <- sensitivity::tell(gensolp1, Yp2)
print(xp1)
print(xp2)
Sobol4R::autoplot(xp1)
Sobol4R::autoplot(xp2)
ex5_results <- sobol_example_process(order = 2)
ex5_results
Sobol4R::autoplot(ex5_results$xp_single)
Sobol4R::autoplot(ex5_results$xp_qoi)
rm(ex5_results)