# Create a vector in R
x <- c(5, 29, 13, 87)
x
## [1] 5 29 13 87
<-
symbol means assign x
the value c(5, 29, 13, 87)
.=
instead of <-
but this is discouraged.object_name <- value
.c()
means “concatenate”.x
into the console to print its assignment.[1]
tells us that 5 is the first element of the vector.# Create a vector in R
x <- 1:50
x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
Vector | Homogeneous | Heterogeneous |
---|---|---|
1d | Atomic vector | List |
2d | Matrix | Data frame |
nd | Array | - |
str()
(short for structure).x <- c(5, 29, 13, 87)
str(x)
## num [1:4] 5 29 13 87
typeof()
, what it is.length()
, how many elements it contains.attributes()
, additional arbitrary metadata.Special syntax to create an individual value, AKA a scalar:
TRUE
or FALSE
),T
or F
).0.1234
), scientific (1.23e4
), or hexadecimal (0xcafe
) form.Inf, -Inf
, and NaN
(not a number).L
(1234L, 1e4L
, or 0xcafeL
)," ("hi")
or ' ('bye')
.?Quotes
for details.c()
To create longer vectors from shorter ones, use c()
:
lgl_var <- c(TRUE, FALSE)
int_var <- c(1L, 6L, 10L)
dbl_var <- c(1, 2.5, 4.5)
chr_var <- c("these are", "some strings")
Depicting vectors as connected rectangles:
c()
returns atomic vectors (i.e., flattens):c(c(1, 2), c(3, 4))
## [1] 1 2 3 4
typeof()
and length()
:lgl_var <- c(TRUE, FALSE)
typeof(lgl_var)
## [1] "logical"
int_var <- c(1L, 6L, 10L)
typeof(int_var)
## [1] "integer"
dbl_var <- c(1, 2.5, 4.5)
typeof(dbl_var)
## [1] "double"
chr_var <- c("these are", "some strings")
typeof(chr_var)
## [1] "character"
NA
(short for not applicable/available).NA > 5
## [1] NA
10 * NA
## [1] NA
!NA
## [1] NA
NA ^ 0
## [1] 1
NA | TRUE
## [1] TRUE
NA & FALSE
## [1] FALSE
x <- c(NA, 5, NA, 10)
x == NA
## [1] NA NA NA NA
is.na()
:x <- c(NA, 5, NA, 10)
is.na(x)
## [1] TRUE FALSE TRUE FALSE
is.*()
, but be careful:
is.logical(), is.integer(), is.double()
, and is.character()
do what you might expect.is.vector(), is.atomic()
, and is.numeric()
or carefully read the documentation.str(c("a", 1))
## chr [1:2] "a" "1"
+, log
, etc.) coerce to numeric.TRUE/FALSE
become 1/0
.x <- c(FALSE, FALSE, TRUE)
as.numeric(x)
## [1] 0 0 1
c(sum(x), mean(x)) # Total number of TRUEs and proportion that are TRUE
## [1] 1.0000000 0.3333333
as.*()
(as.logical(), as.integer(), as.double()
, or as.character()
).as.integer(c("1", "1.5", "a"))
## Warning: NAs introduced by coercion
## [1] 1 1 NA
How about matrices, arrays, factors, or date-times?
dim
attribute to make matrices and arrays.class
attribute to create “S3” vectors, including factors, dates, and date-times.attr()
,attributes()/structure()
.a <- 1:3
attr(a, "x") <- "abcdef"
attr(a, "x")
## [1] "abcdef"
attr(a, "y") <- 4:6
str(attributes(a))
## List of 2
## $ x: chr "abcdef"
## $ y: int [1:3] 4 5 6
# Or equivalently
a <- structure(
1:3,
x = "abcdef",
y = 4:6
)
a <- structure(
1:3,
x = "abcdef",
y = 4:6
)
attributes(a[1])
## NULL
attributes(sum(a))
## NULL
names
, a character vector giving each element a name.dim
, short for dimensions, an integer vector, used to turn vectors into matrices or arrays.You can name a vector in three ways:
# When creating it
x <- c(a = 1, b = 2, c = 3)
# By assigning a character vector to names()
x <- 1:3
names(x) <- c("a", "b", "c")
# Inline, with setNames()
x <- setNames(1:3, c("a", "b", "c"))
attr(x, "names")
(more typing and less readable).unname(x)
or names(x) <- NULL
.dim
attribute allow a vector allows it to behave like a 2-dimensional matrix or a multi-dimensional array.matrix()
:# Two scalar arguments specify row and column sizes
a <- matrix(1:6, nrow = 2, ncol = 3)
a
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
array()
:b <- array(1:12, c(2, 3, 2))
b
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 7 9 11
## [2,] 8 10 12
dim()
:# You can also modify an object in place by setting dim()
c <- 1:6
dim(c) <- c(3, 2)
c
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
Vector | Matrix | Array |
---|---|---|
names() |
rownames() , colnames() |
dimnames() |
length() |
nrow() , ncol() |
dim() |
c() |
rbind() , cbind() |
abind::abind() |
— | t() |
aperm() |
is.null(dim(x)) |
is.matrix() |
is.array() |
dim
attribute set is often thought of as 1-dimensional, but actually has NULL
dimensions.str()
to reveal the differences.str(1:3) # 1d vector
## int [1:3] 1 2 3
str(matrix(1:3, ncol = 1)) # column vector
## int [1:3, 1] 1 2 3
str(matrix(1:3, nrow = 1)) # row vector
## int [1, 1:3] 1 2 3
str(array(1:3, 3))
## int [1:3(1d)] 1 2 3
class
, which underlies the S3 object system.
class
(defines a behavior different from integer vectors),levels
(defines the set of allowed values).x <- factor(c("a", "b", "b", "a"))
x
## [1] a b b a
## Levels: a b
typeof(x)
## [1] "integer"
attributes(x)
## $levels
## [1] "a" "b"
##
## $class
## [1] "factor"
sex_char <- c("m", "m", "m")
table(sex_char)
## sex_char
## m
## 3
sex_factor <- factor(sex_char, levels = c("m", "f"))
table(sex_factor)
## sex_factor
## m f
## 3 0
grade <- ordered(c("b", "b", "a", "c"), levels = c("c", "b", "a"))
grade
## [1] b b a c
## Levels: c < b < a
gsub()
and grepl()
) will automatically coerce factors to strings.nchar()
) will throw an error.c()
) use the underlying integer values.read.csv()/data.frame()
) automatically convert character vectors to factors.stringsAsFactors = FALSE
to suppress this behaviour, and then manually convert character vectors to factors using your knowledge of the “theoretical” data.today <- Sys.Date()
typeof(today)
## [1] "double"
attributes(today)
## $class
## [1] "Date"
date <- as.Date("1970-02-01")
unclass(date)
## [1] 31
time_t
type in C),struct tm
type in C).POSIXct
(the simplest):
now_ct <- as.POSIXct("2022-03-31 12:00", tz = "UTC")
now_ct
## [1] "2022-03-31 12:00:00 UTC"
typeof(now_ct)
## [1] "double"
attributes(now_ct)
## $class
## [1] "POSIXct" "POSIXt"
##
## $tzone
## [1] "UTC"
tzone
attribute:
now_ct <- as.POSIXct("2022-03-31 12:00", tz = "UTC")
structure(now_ct, tzone = "Asia/Tokyo")
## [1] "2022-03-31 21:00:00 JST"
structure(now_ct, tzone = "America/New_York")
## [1] "2022-03-31 08:00:00 EDT"
structure(now_ct, tzone = "Australia/Lord_Howe")
## [1] "2022-03-31 23:00:00 +11"
structure(now_ct, tzone = "Europe/Paris")
## [1] "2022-03-31 14:00:00 CEST"
difftimes
:
units
attribute determines how to interpret the integer.one_week_1 <- as.difftime(1, units = "weeks")
one_week_1
## Time difference of 1 weeks
typeof(one_week_1)
## [1] "double"
attributes(one_week_1)
## $class
## [1] "difftime"
##
## $units
## [1] "weeks"
one_week_2 <- as.difftime(7, units = "days")
one_week_2
## Time difference of 7 days
typeof(one_week_2)
## [1] "double"
attributes(one_week_2)
## $class
## [1] "difftime"
##
## $units
## [1] "days"
list()
.l1 <- list(
1:3,
"a",
c(TRUE, FALSE, TRUE),
c(2.3, 5.9)
)
typeof(l1)
## [1] "list"
str(l1)
## List of 4
## $ : int [1:3] 1 2 3
## $ : chr "a"
## $ : logi [1:3] TRUE FALSE TRUE
## $ : num [1:2] 2.3 5.9
l3 <- list(list(list(1)))
str(l3)
## List of 1
## $ :List of 1
## ..$ :List of 1
## .. ..$ : num 1
c()
will combine several lists into one:l4 <- list(list(1, 2), c(3, 4))
l5 <- c(list(1, 2), c(3, 4))
str(l4)
## List of 2
## $ :List of 2
## ..$ : num 1
## ..$ : num 2
## $ : num [1:2] 3 4
str(l5)
## List of 4
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
typeof()
a list is list.is.list()
, and coerce to a list with as.list()
.list(1:3)
## [[1]]
## [1] 1 2 3
as.list(1:3)
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
unlist()
, but careful:
c()
.row.names
, and its class, data.frame
.df1 <- data.frame(x = 1:3, y = letters[1:3])
typeof(df1)
## [1] "list"
attributes(df1)
## $names
## [1] "x" "y"
##
## $class
## [1] "data.frame"
##
## $row.names
## [1] 1 2 3
rownames()
and colnames()
, but its names()
are the column names.nrow()
rows and ncol()
columns, but its length()
is the number of columns.tibble
, a modern reimagining of the data frame.tibble
package.data.frame
.tbl_df
.library(tibble)
df2 <- tibble(x = 1:3, y = letters[1:3])
typeof(df2)
## [1] "list"
attributes(df2)
## $names
## [1] "x" "y"
##
## $row.names
## [1] 1 2 3
##
## $class
## [1] "tbl_df" "tbl" "data.frame"
data.frame
data.frame()
.df <- data.frame(
x = 1:3,
y = c("a", "b", "c")
)
str(df)
## 'data.frame': 3 obs. of 2 variables:
## $ x: int 1 2 3
## $ y: chr "a" "b" "c"
df1 <- data.frame(
x = 1:3,
y = c("a", "b", "c"),
stringsAsFactors = FALSE
)
str(df1)
## 'data.frame': 3 obs. of 2 variables:
## $ x: int 1 2 3
## $ y: chr "a" "b" "c"
tibble
df2 <- tibble(
x = 1:3,
y = c("a", "b", "c")
)
str(df2)
## tbl_df [3 x 2] (S3: tbl_df/tbl/data.frame)
## $ x: int [1:3] 1 2 3
## $ y: chr [1:3] "a" "b" "c"
data.frame()
and tibble()
.
TRUE, NULL, if
, and function
(see the complete list in ?Reserved
)._abc <- 1
#> Error: unexpected input in "_"
if <- 10
#> Error: unexpected assignment in "if <-"
`_abc` <- 1
`_abc`
## [1] 1
`if` <- 10
`if`
## [1] 10
names(data.frame(`1` = 1))
## [1] "X1"
names(data.frame(`1` = 1, check.names = FALSE))
## [1] "1"
names(tibble(`1` = 1))
## [1] "1"
data.frame()
and tibble()
recycle shorter inputs, but
data.frame(x = 1:4, y = 1:2)
## x y
## 1 1 1
## 2 2 2
## 3 3 1
## 4 4 2
data.frame(x = 1:4, y = 1:3)
# Error in data.frame(x = 1:4, y = 1:3) : arguments imply differing number of rows: 4, 3
tibble(x = 1:4, y = 1)
## # A tibble: 4 x 2
## x y
## <int> <dbl>
## 1 1 1
## 2 2 1
## 3 3 1
## 4 4 1
tibble(x = 1:4, y = 1:2)
# Error: Tibble columns must have compatible sizes. * Size 4: Existing data. * Size 2: Column `y`. i Only values of size one are recycled.
tibble()
allows you to refer to variables created during construction:tibble(
x = 1:3,
y = x * 2
)
## # A tibble: 3 x 2
## x y
## <int> <dbl>
## 1 1 2
## 2 2 4
## 3 3 6
(Inputs are evaluated left-to-right.)
print(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
print(dplyr::starwars)
## # A tibble: 87 x 14
## name height mass hair_color skin_color eye_color birth_year sex
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke~ 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 none
## 3 R2-D2 96 32 <NA> white, bl~ red 33 none
## 4 Dart~ 202 136 none white yellow 41.9 male
## 5 Leia~ 150 49 brown light brown 19 fema~
## 6 Owen~ 178 120 brown, gr~ light blue 52 male
## 7 Beru~ 165 75 brown light blue 47 fema~
## 8 R5-D4 97 32 <NA> white, red red NA none
## 9 Bigg~ 183 84 black light brown 24 male
## 10 Obi-~ 182 77 auburn, w~ fair blue-gray 57 male
## # ... with 77 more rows, and 6 more variables: gender <chr>,
## # homeworld <chr>, species <chr>, films <list>, vehicles <list>,
## # starships <list>
df1 <- data.frame(
x = 1:3,
y = c("a", "b", "c"),
stringsAsFactors = FALSE
)
df2 <- tibble(
x = 1:3,
y = c("a", "b", "c")
)
is.data.frame(df1)
## [1] TRUE
is.data.frame(df2)
## [1] TRUE
df1 <- data.frame(
x = 1:3,
y = c("a", "b", "c"),
stringsAsFactors = FALSE
)
df2 <- tibble(
x = 1:3,
y = c("a", "b", "c")
)
is_tibble(df1)
## [1] FALSE
is_tibble(df2)
## [1] TRUE
as.data.frame()
or as_tibble()
.I()
.df <- data.frame(x = 1:3)
df$y <- list(1:2, 1:3, 1:4)
data.frame(
x = 1:3,
y = I(list(1:2, 1:3, 1:4))
)
## x y
## 1 1 1, 2
## 2 2 1, 2, 3
## 3 3 1, 2, 3, 4
tibble(
x = 1:3,
y = list(1:2, 1:3, 1:4)
)
## # A tibble: 3 x 2
## x y
## <int> <list>
## 1 1 <int [2]>
## 2 2 <int [3]>
## 3 3 <int [4]>
dfm <- data.frame(
x = 1:3 * 10
)
dfm$y <- matrix(1:9, nrow = 3)
dfm$z <- data.frame(a = 3:1, b = letters[1:3],
stringsAsFactors = FALSE)
str(dfm)
## 'data.frame': 3 obs. of 3 variables:
## $ x: num 10 20 30
## $ y: int [1:3, 1:3] 1 2 3 4 5 6 7 8 9
## $ z:'data.frame': 3 obs. of 2 variables:
## ..$ a: int 3 2 1
## ..$ b: chr "a" "b" "c"
dfm <- data.frame(
x = 1:3 * 10
)
dfm$y <- matrix(1:9, nrow = 3)
dfm$z <- data.frame(a = 3:1, b = letters[1:3],
stringsAsFactors = FALSE)
print(dfm[1, ])
## x y.1 y.2 y.3 z.a z.b
## 1 10 1 4 7 3 a
NULL
typeof(NULL)
## [1] "NULL"
length(NULL)
## [1] 0
x <- NULL
x <- NULL
attr(x, "y") <- 1
#> Error in attr(x, "y") <- 1: attempt to set an attribute on NULL
NULLs
with is.null()
:is.null(NULL)
## [1] TRUE
NULL
commonly represents
NULL
is often used as a default function argument.NA
, which indicates that an element of a vector is absent.c()
## NULL
c(NULL, NULL)
## NULL
c(NULL, 1:3)
## [1] 1 2 3
NULL
, but the database NULL
is actually equivalent to R’s NA
.[[, [
, and \$
.str()
:
str()
shows the pieces of any object (its structure).[
.[[
and \$
.[
for atomic vectorsWe’ll look at the following vector:
x <- c(2.1, 4.2, 3.3, 5.4)
x <- c(2.1, 4.2, 3.3, 5.4)
x[c(3, 1)]
## [1] 3.3 2.1
x[order(x)]
## [1] 2.1 3.3 4.2 5.4
x[c(1, 1)] # Duplicate indices will duplicate values
## [1] 2.1 2.1
x[c(2.1, 2.9)] # Real numbers are silently truncated to integers
## [1] 4.2 4.2
x <- c(2.1, 4.2, 3.3, 5.4)
x[-c(3, 1)]
## [1] 4.2 5.4
x <- c(2.1, 4.2, 3.3, 5.4)
x[c(-1, 2)]
#> Error in x[c(-1, 2)]: only 0's may be mixed with negative subscripts
TRUE
(probably the most useful):x <- c(2.1, 4.2, 3.3, 5.4)
x[c(TRUE, TRUE, FALSE, FALSE)]
## [1] 2.1 4.2
x[x > 3]
## [1] 4.2 3.3 5.4
x[y]
, what happens if x
and y
are different lengths?
x
OR y
is length one, but avoid for other lengths because of inconsistencies in base R.x <- c(2.1, 4.2, 3.3, 5.4)
x[c(TRUE, FALSE)]
## [1] 2.1 3.3
# Equivalent to
x[c(TRUE, FALSE, TRUE, FALSE)]
## [1] 2.1 3.3
x <- c(2.1, 4.2, 3.3, 5.4)
x[]
## [1] 2.1 4.2 3.3 5.4
x <- c(2.1, 4.2, 3.3, 5.4)
x[0]
## numeric(0)
x <- c(2.1, 4.2, 3.3, 5.4)
(y <- setNames(x, letters[1:4]))
## a b c d
## 2.1 4.2 3.3 5.4
y[c("d", "c", "a")]
## d c a
## 5.4 3.3 2.1
# Like integer indices, you can repeat indices
y[c("a", "a", "a")]
## a a a
## 2.1 2.1 2.1
# When subsetting with [, names are always matched exactly
z <- c(abc = 1, def = 2)
z[c("a", "d")]
## <NA> <NA>
## NA NA
x <- c(2.1, 4.2, 3.3, 5.4)
x[c(TRUE, TRUE, NA, FALSE)]
## [1] 2.1 4.2 NA
x <- c(2.1, 4.2, 3.3, 5.4)
(y <- setNames(x, letters[1:4]))
## a b c d
## 2.1 4.2 3.3 5.4
y[factor("b")]
## a
## 2.1
[
for lists[
always returns a list; [[
and \$
(see later), lets you pull out elements of a list.[
for matrices and arraysa <- matrix(1:9, nrow = 3)
colnames(a) <- c("A", "B", "C")
a[1:2, ]
## A B C
## [1,] 1 4 7
## [2,] 2 5 8
a[c(TRUE, FALSE, TRUE), c("B", "A")]
## B A
## [1,] 4 1
## [2,] 6 3
a[0, -2]
## A C
[
simplifies the results to the lowest possible dimensionality.
a <- matrix(1:9, nrow = 3)
a[1, ]
## [1] 1 4 7
a[1, 1]
## [1] 1
vals <- outer(1:5, 1:5, FUN = "paste", sep = ",")
vals
## [,1] [,2] [,3] [,4] [,5]
## [1,] "1,1" "1,2" "1,3" "1,4" "1,5"
## [2,] "2,1" "2,2" "2,3" "2,4" "2,5"
## [3,] "3,1" "3,2" "3,3" "3,4" "3,5"
## [4,] "4,1" "4,2" "4,3" "4,4" "4,5"
## [5,] "5,1" "5,2" "5,3" "5,4" "5,5"
vals[c(4, 15)]
## [1] "4,1" "5,3"
vals <- outer(1:5, 1:5, FUN = "paste", sep = ",")
select <- matrix(ncol = 2, byrow = TRUE, c(
1, 1,
3, 1,
2, 4
))
vals[select]
## [1] "1,1" "3,1" "2,4"
[
for data frames and tibblesdf[1:2]
selects the first two columns.df[1:3, ]
selects the first three rows (and all columns)df <- data.frame(x = 1:3, y = 3:1, z = letters[1:3])
df[df$x == 2, ]
## x y z
## 2 2 2 b
df[c(1, 3), ]
## x y z
## 1 1 3 a
## 3 3 1 c
df <- data.frame(x = 1:3, y = 3:1, z = letters[1:3])
# Like a list
df[c("x", "z")]
## x z
## 1 1 a
## 2 2 b
## 3 3 c
# Like a matrix
df[, c("x", "z")]
## x z
## 1 1 a
## 2 2 b
## 3 3 c
df <- data.frame(x = 1:3, y = 3:1, z = letters[1:3])
str(df[, "x"])
## int [1:3] 1 2 3
str(df["x"])
## 'data.frame': 3 obs. of 1 variable:
## $ x: int 1 2 3
[
always returns a tibble:df <- tibble::tibble(x = 1:3, y = 3:1, z = letters[1:3])
str(df["x"])
## tbl_df [3 x 1] (S3: tbl_df/tbl/data.frame)
## $ x: int [1:3] 1 2 3
str(df[, "x"])
## tbl_df [3 x 1] (S3: tbl_df/tbl/data.frame)
## $ x: int [1:3] 1 2 3
a <- matrix(1:4, nrow = 2)
str(a[1, ])
## int [1:2] 1 3
str(a[1, , drop = FALSE])
## int [1, 1:2] 1 3
df <- data.frame(a = 1:2, b = 1:2)
str(df[, "a"])
## int [1:2] 1 2
str(df[, "a", drop = FALSE])
## 'data.frame': 2 obs. of 1 variable:
## $ a: int 1 2
drop = TRUE
is a common source of bugs:
drop = FALSE
and [
always returns a tibble.FALSE
.drop = TRUE
, use a character vector instead.z <- factor(c("a", "b"))
z[1]
## [1] a
## Levels: a b
z[1, drop = TRUE]
## [1] a
## Levels: a
The other two subsetting operators:
[[
is used for extracting single items.
x\$y
is a useful shorthand for x[["y"]]
.
[[
[[
is most important when working with lists because subsetting a list with [
always returns a smaller list.If list x is a train carrying objects, then x[[5]] is the object in car 5; x[4:6] is a train of cars 4-6. — @RLangTip, https://twitter.com/RLangTip/status/268375867468681216
x <- list(1:3, "a", 4:6)
[
.[[
.$
x$y
is roughly equivalent to x[["y"]]
.mtcars$cyl
or diamonds$carat
.$
:var <- "cyl"
# Doesn't work - mtcars$var translated to mtcars[["var"]]
mtcars$var
## NULL
# Instead use [[
mtcars[[var]]
## [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
\$
and [[
is (left-to-right) partial matching:x <- list(abc = 1)
x$a
## [1] 1
x[["a"]]
## NULL
options(warnPartialMatchDollar = TRUE)
x <- list(abc = 1)
x$a
## Warning in x$a: partial match of 'a' to 'abc'
## [1] 1
(For data frames, you can also avoid this problem by using tibbles, which never do partial matching.)
df[, vars]
:
vars
selects one variable.drop = FALSE
.df\$x
:
x
, selects any variable that starts with x
.x
, returns NULL
.[
always returns a tibble.\$
doesn’t do partial matching and warns if it can’t find a variable (makes tibbles surly).df1 <- data.frame(xyz = "a")
str(df1$x)
## Warning in df1$x: partial match of 'x' to 'xyz'
## chr "a"
df2 <- tibble(xyz = "a")
str(df2$x)
## Warning: Unknown or uninitialised column: `x`.
## NULL
–>
x[i] <- value
:x <- 1:5
x[c(1, 2)] <- c(101, 102)
x
## [1] 101 102 3 4 5
length(value)
is the same as length(x[i])
,i
is unique.NULL
x[[i]] <- NULL
removes a component.NULL
, use x[i] <- list(NULL)
.x <- list(a = 1, b = 2)
x[["b"]] <- NULL
str(x)
## List of 1
## $ a: num 1
y <- list(a = 1, b = 2)
y["b"] <- list(NULL)
str(y)
## List of 2
## $ a: num 1
## $ b: NULL
mtcars[] <- lapply(mtcars, as.integer)
is.data.frame(mtcars)
## [1] TRUE
mtcars <- lapply(mtcars, as.integer)
is.data.frame(mtcars)
## [1] FALSE