shuffle.Rd
Unrestricted and restricted permutation designs for time series, line transects, spatial grids and blocking factors.
shuffle(n, control = how())
permute(i, n, control)
numeric; the length of the returned vector of permuted
values. Usually the number of observations under consideration. May
also be any object that nobs
knows about; see
nobs-methods
.
a list of control values describing properties of the
permutation design, as returned by a call to how
.
integer; row of control$all.perms
to return.
shuffle
can generate permutations for a wide range of
restricted permutation schemes. A small selection of the available
combinations of options is provided in the Examples section below.
permute
is a higher level utility function for use in a loop
within a function implementing a permutation test. The main purpose of
permute
is to return the correct permutation in each iteration
of the loop, either a random permutation from the current design or
the next permutation from control$all.perms
if it is not
NULL
and control$complete
is TRUE
.
For shuffle
a vector of length n
containing a
permutation of the observations 1, ..., n using the permutation
scheme described by argument control
.
For permute
the i
th permutation from the set of all
permutations, or a random permutation from the design.
shuffle()
is modelled after the permutation schemes of Canoco
3.1 (ter Braak, 1990); see also Besag & Clifford (1989).
Besag, J. and Clifford, P. (1989) Generalized Monte Carlo significance tests. Biometrika 76; 633–642.
ter Braak, C. J. F. (1990). Update notes: CANOCO version 3.1. Wageningen: Agricultural Mathematics Group. (UR).
set.seed(1234)
## unrestricted permutations
shuffle(20)
#> [1] 3 12 11 18 14 10 1 4 8 6 7 5 20 15 2 9 17 16 19 13
## observations represent a time series of line transect
CTRL <- how(within = Within(type = "series"))
shuffle(20, control = CTRL)
#> [1] 8 9 10 11 12 13 14 15 16 17 18 19 20 1 2 3 4 5 6 7
## observations represent a time series of line transect
## but with mirroring allowed
CTRL <- how(within = Within(type = "series", mirror = TRUE))
shuffle(20, control = CTRL)
#> [1] 7 6 5 4 3 2 1 20 19 18 17 16 15 14 13 12 11 10 9 8
## observations represent a spatial grid, 5rx4c
nr <- 5
nc <- 4
CTRL <- how(within = Within(type = "grid", ncol = nc, nrow = nr))
perms <- shuffle(20, control = CTRL)
## view the permutation as a grid
matrix(matrix(1:20, nrow = nr, ncol = nc)[perms],
ncol = nc, nrow = nr)
#> [,1] [,2] [,3] [,4]
#> [1,] 7 12 17 2
#> [2,] 8 13 18 3
#> [3,] 9 14 19 4
#> [4,] 10 15 20 5
#> [5,] 6 11 16 1
## random permutations in presence of strata
plots <- Plots(strata = gl(4, 5))
CTRL <- how(plots = plots, within = Within(type = "free"))
shuffle(20, CTRL)
#> [1] 5 3 4 2 1 8 7 6 9 10 14 11 15 12 13 18 20 16 17 19
## as above but same random permutation within strata
CTRL <- how(plots = plots, within = Within(type = "free",
constant = TRUE))
shuffle(20, CTRL)
#> [1] 3 5 2 1 4 8 10 7 6 9 13 15 12 11 14 18 20 17 16 19
## time series within each level of block
CTRL <- how(plots = plots, within = Within(type = "series"))
shuffle(20, CTRL)
#> [1] 2 3 4 5 1 8 9 10 6 7 15 11 12 13 14 19 20 16 17 18
## as above, but with same permutation for each level
CTRL <- how(plots = plots, within = Within(type = "series",
constant = TRUE))
shuffle(20, CTRL)
#> [1] 2 3 4 5 1 7 8 9 10 6 12 13 14 15 11 17 18 19 20 16
## spatial grids within each level of block, 4 x (5r x 5c)
nr <- 5
nc <- 5
nb <- 4 ## number of blocks
plots <- Plots(gl(nb, 25))
CTRL <- how(plots = plots,
within = Within(type = "grid", ncol = nc, nrow = nr))
shuffle(100, CTRL)
#> [1] 24 25 21 22 23 4 5 1 2 3 9 10 6 7 8 14 15 11
#> [19] 12 13 19 20 16 17 18 27 28 29 30 26 32 33 34 35 31 37
#> [37] 38 39 40 36 42 43 44 45 41 47 48 49 50 46 56 57 58 59
#> [55] 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 51 52
#> [73] 53 54 55 83 84 85 81 82 88 89 90 86 87 93 94 95 91 92
#> [91] 98 99 100 96 97 78 79 80 76 77
## as above, but with same permutation for each level
CTRL <- how(plots = plots,
within = Within(type = "grid", ncol = nc, nrow = nr,
constant = TRUE))
shuffle(100, CTRL)
#> [1] 23 24 25 21 22 3 4 5 1 2 8 9 10 6 7 13 14 15
#> [19] 11 12 18 19 20 16 17 48 49 50 46 47 28 29 30 26 27 33
#> [37] 34 35 31 32 38 39 40 36 37 43 44 45 41 42 73 74 75 71
#> [55] 72 53 54 55 51 52 58 59 60 56 57 63 64 65 61 62 68 69
#> [73] 70 66 67 98 99 100 96 97 78 79 80 76 77 83 84 85 81 82
#> [91] 88 89 90 86 87 93 94 95 91 92
## permuting levels of plots instead of observations
CTRL <- how(plots = Plots(gl(4, 5), type = "free"),
within = Within(type = "none"))
shuffle(20, CTRL)
#> [1] 11 12 13 14 15 1 2 3 4 5 6 7 8 9 10 16 17 18 19 20
## permuting levels of plots instead of observations
## but plots represent a time series
CTRL <- how(plots = Plots(gl(4, 5), type = "series"),
within = Within(type = "none"))
shuffle(20, CTRL)
#> [1] 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 1 2 3 4 5
## permuting levels of plots but plots represent a time series
## free permutation within plots
CTRL <- how(plots = Plots(gl(4, 5), type = "series"),
within = Within(type = "free"))
shuffle(20, CTRL)
#> [1] 12 11 13 14 15 16 20 19 17 18 1 2 3 4 5 6 8 10 7 9
## permuting within blocks
grp <- gl(2, 10) # 2 groups of 10 samples each
CTRL <- how(blocks = grp)
shuffle(length(grp), control = CTRL)
#> [1] 1 3 6 10 4 2 7 8 9 5 12 14 11 15 18 17 16 19 20 13
## Simple function using permute() to assess significance
## of a t.test
pt.test <- function(x, group, control) {
## function to calculate t
t.statistic <- function(x, y) {
m <- length(x)
n <- length(y)
## means and variances, but for speed
xbar <- mean(x)
ybar <- mean(y)
xvar <- var(x)
yvar <- var(y)
pooled <- sqrt(((m-1)*xvar + (n-1)*yvar) / (m+n-2))
(xbar - ybar) / (pooled * sqrt(1/m + 1/n))
}
## check the control object
#control <- check(x, control)$control ## FIXME
## number of observations
Nobs <- nobs(x)
## group names
lev <- names(table(group))
## vector to hold results, +1 because of observed t
t.permu <- numeric(length = control$nperm) + 1
## calculate observed t
t.permu[1] <- t.statistic(x[group == lev[1]], x[group == lev[2]])
## generate randomisation distribution of t
for(i in seq_along(t.permu)) {
## return a permutation
want <- permute(i, Nobs, control)
## calculate permuted t
t.permu[i+1] <- t.statistic(x[want][group == lev[1]],
x[want][group == lev[2]])
}
## pval from permutation test
pval <- sum(abs(t.permu) >= abs(t.permu[1])) / (control$nperm + 1)
## return value
return(list(t.stat = t.permu[1], pval = pval))
}
## generate some data with slightly different means
set.seed(1234)
gr1 <- rnorm(20, mean = 9)
gr2 <- rnorm(20, mean = 10)
dat <- c(gr1, gr2)
## grouping variable
grp <- gl(2, 20, labels = paste("Group", 1:2))
## create the permutation design
control <- how(nperm = 999, within = Within(type = "free"))
## perform permutation t test
perm.val <- pt.test(dat, grp, control)
perm.val
#> $t.stat
#> [1] -2.342064
#>
#> $pval
#> [1] 0.024
#>
## compare perm.val with the p-value from t.test()
t.test(dat ~ grp, var.equal = TRUE)
#>
#> Two Sample t-test
#>
#> data: dat by grp
#> t = -2.3421, df = 38, p-value = 0.02452
#> alternative hypothesis: true difference in means between group Group 1 and group Group 2 is not equal to 0
#> 95 percent confidence interval:
#> -1.25582408 -0.09136416
#> sample estimates:
#> mean in group Group 1 mean in group Group 2
#> 8.749336 9.422930
#>