nycflights13
nycflights13::flights
All 336,776 flights that departed from NYC in 2013
nycflights13::flights
## # A tibble: 336,776 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # ... with 336,766 more rows, and 12 more variables:
## # sched_arr_time <int>, arr_delay <dbl>, carrier <chr>,
## # flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
nycflights13::airlines
airlines
## # A tibble: 16 x 2
## carrier name
## <chr> <chr>
## 1 9E Endeavor Air Inc.
## 2 AA American Airlines Inc.
## 3 AS Alaska Airlines Inc.
## 4 B6 JetBlue Airways
## 5 DL Delta Air Lines Inc.
## 6 EV ExpressJet Airlines Inc.
## 7 F9 Frontier Airlines Inc.
## 8 FL AirTran Airways Corporation
## 9 HA Hawaiian Airlines Inc.
## 10 MQ Envoy Air
## 11 OO SkyWest Airlines Inc.
## 12 UA United Air Lines Inc.
## 13 US US Airways Inc.
## 14 VX Virgin America
## 15 WN Southwest Airlines Co.
## 16 YV Mesa Airlines Inc.
nycflights13::airports
airports
## # A tibble: 1,458 x 8
## faa name lat lon alt tz dst tzone
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A America/Ne~
## 2 06A Moton Field Munic~ 32.5 -85.7 264 -6 A America/Ch~
## 3 06C Schaumburg Region~ 42.0 -88.1 801 -6 A America/Ch~
## 4 06N Randall Airport 41.4 -74.4 523 -5 A America/Ne~
## 5 09J Jekyll Island Air~ 31.1 -81.4 11 -5 A America/Ne~
## 6 0A9 Elizabethton Muni~ 36.4 -82.2 1593 -5 A America/Ne~
## 7 0G6 Williams County A~ 41.5 -84.5 730 -5 A America/Ne~
## 8 0G7 Finger Lakes Regi~ 42.9 -76.8 492 -5 A America/Ne~
## 9 0P2 Shoestring Aviati~ 39.8 -76.6 1000 -5 U America/Ne~
## 10 0S9 Jefferson County ~ 48.1 -123. 108 -8 A America/Lo~
## # ... with 1,448 more rows
nycflights13::planes
planes
## # A tibble: 3,322 x 9
## tailnum year type manufacturer model engines seats speed engine
## <chr> <int> <chr> <chr> <chr> <int> <int> <int> <chr>
## 1 N10156 2004 Fixed ~ EMBRAER EMB-~ 2 55 NA Turbo~
## 2 N102UW 1998 Fixed ~ AIRBUS INDU~ A320~ 2 182 NA Turbo~
## 3 N103US 1999 Fixed ~ AIRBUS INDU~ A320~ 2 182 NA Turbo~
## 4 N104UW 1999 Fixed ~ AIRBUS INDU~ A320~ 2 182 NA Turbo~
## 5 N10575 2002 Fixed ~ EMBRAER EMB-~ 2 55 NA Turbo~
## 6 N105UW 1999 Fixed ~ AIRBUS INDU~ A320~ 2 182 NA Turbo~
## 7 N107US 1999 Fixed ~ AIRBUS INDU~ A320~ 2 182 NA Turbo~
## 8 N108UW 1999 Fixed ~ AIRBUS INDU~ A320~ 2 182 NA Turbo~
## 9 N109UW 1999 Fixed ~ AIRBUS INDU~ A320~ 2 182 NA Turbo~
## 10 N110UW 1999 Fixed ~ AIRBUS INDU~ A320~ 2 182 NA Turbo~
## # ... with 3,312 more rows
nycflights13::weather
weather
## # A tibble: 26,115 x 15
## origin year month day hour temp dewp humid wind_dir
## <chr> <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 EWR 2013 1 1 1 39.0 26.1 59.4 270
## 2 EWR 2013 1 1 2 39.0 27.0 61.6 250
## 3 EWR 2013 1 1 3 39.0 28.0 64.4 240
## 4 EWR 2013 1 1 4 39.9 28.0 62.2 250
## 5 EWR 2013 1 1 5 39.0 28.0 64.4 260
## 6 EWR 2013 1 1 6 37.9 28.0 67.2 240
## 7 EWR 2013 1 1 7 39.0 28.0 64.4 240
## 8 EWR 2013 1 1 8 39.9 28.0 62.2 250
## 9 EWR 2013 1 1 9 39.9 28.0 62.2 260
## 10 EWR 2013 1 1 10 41 28.0 59.6 260
## # ... with 26,105 more rows, and 6 more variables: wind_speed <dbl>,
## # wind_gust <dbl>, precip <dbl>, pressure <dbl>, visib <dbl>,
## # time_hour <dttm>
weather
and airports
.
weather
only contains information for the origin (NYC) airports.
tailnum
for planes
).year, month, day, hour
, and origin
for weather).planes$tailnum
.flights$tailnum
.planes %>%
count(tailnum) %>%
filter(n > 1)
## # A tibble: 0 x 2
## # ... with 2 variables: tailnum <chr>, n <int>
weather %>%
count(year, month, day, hour, origin) %>%
filter(n > 1)
## # A tibble: 3 x 6
## year month day hour origin n
## <int> <int> <int> <int> <chr> <int>
## 1 2013 11 3 1 EWR 2
## 2 2013 11 3 1 JFK 2
## 3 2013 11 3 1 LGA 2
flights %>%
count(year, month, day, flight) %>%
filter(n > 1)
## # A tibble: 29,768 x 5
## year month day flight n
## <int> <int> <int> <int> <int>
## 1 2013 1 1 1 2
## 2 2013 1 1 3 2
## 3 2013 1 1 4 2
## 4 2013 1 1 11 3
## 5 2013 1 1 15 2
## 6 2013 1 1 21 2
## 7 2013 1 1 27 4
## 8 2013 1 1 31 2
## 9 2013 1 1 32 2
## 10 2013 1 1 35 2
## # ... with 29,758 more rows
mutate()
and row_number()
.## Create a narrower dataset
flights2 <- flights %>%
select(year:day, hour, origin, dest, tailnum, carrier)
flights2
## # A tibble: 336,776 x 8
## year month day hour origin dest tailnum carrier
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr>
## 1 2013 1 1 5 EWR IAH N14228 UA
## 2 2013 1 1 5 LGA IAH N24211 UA
## 3 2013 1 1 5 JFK MIA N619AA AA
## 4 2013 1 1 5 JFK BQN N804JB B6
## 5 2013 1 1 6 LGA ATL N668DN DL
## 6 2013 1 1 5 EWR ORD N39463 UA
## 7 2013 1 1 6 EWR FLL N516JB B6
## 8 2013 1 1 6 LGA IAD N829AS EV
## 9 2013 1 1 6 JFK MCO N593JB B6
## 10 2013 1 1 6 LGA ORD N3ALAA AA
## # ... with 336,766 more rows
flights2 %>%
select(-origin, -dest) %>%
left_join(airlines, by = "carrier")
## # A tibble: 336,776 x 7
## year month day hour tailnum carrier name
## <int> <int> <int> <dbl> <chr> <chr> <chr>
## 1 2013 1 1 5 N14228 UA United Air Lines Inc.
## 2 2013 1 1 5 N24211 UA United Air Lines Inc.
## 3 2013 1 1 5 N619AA AA American Airlines Inc.
## 4 2013 1 1 5 N804JB B6 JetBlue Airways
## 5 2013 1 1 6 N668DN DL Delta Air Lines Inc.
## 6 2013 1 1 5 N39463 UA United Air Lines Inc.
## 7 2013 1 1 6 N516JB B6 JetBlue Airways
## 8 2013 1 1 6 N829AS EV ExpressJet Airlines Inc.
## 9 2013 1 1 6 N593JB B6 JetBlue Airways
## 10 2013 1 1 6 N3ALAA AA American Airlines Inc.
## # ... with 336,766 more rows
Why “mutating” join?
flights2 %>%
select(-origin, -dest) %>%
mutate(name = airlines$name[match(carrier, airlines$carrier)])
## # A tibble: 336,776 x 7
## year month day hour tailnum carrier name
## <int> <int> <int> <dbl> <chr> <chr> <chr>
## 1 2013 1 1 5 N14228 UA United Air Lines Inc.
## 2 2013 1 1 5 N24211 UA United Air Lines Inc.
## 3 2013 1 1 5 N619AA AA American Airlines Inc.
## 4 2013 1 1 5 N804JB B6 JetBlue Airways
## 5 2013 1 1 6 N668DN DL Delta Air Lines Inc.
## 6 2013 1 1 5 N39463 UA United Air Lines Inc.
## 7 2013 1 1 6 N516JB B6 JetBlue Airways
## 8 2013 1 1 6 N829AS EV ExpressJet Airlines Inc.
## 9 2013 1 1 6 N593JB B6 JetBlue Airways
## 10 2013 1 1 6 N3ALAA AA American Airlines Inc.
## # ... with 336,766 more rows
x <- tribble(~key, ~val_x,
1, "x1",
2, "x2",
3, "x3")
y <- tribble(~key, ~val_y,
1, "y1",
2, "y2",
4, "y3")
x <- tribble(~key, ~val_x,
1, "x1",
2, "x2",
3, "x3")
y <- tribble(~key, ~val_y,
1, "y1",
2, "y2",
4, "y3")
x %>%
inner_join(y, by = "key")
## # A tibble: 2 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
x
.y
.x
and y
NA
.A Venn diagram for joins:
x
has duplicated keys:x <- tribble(~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
1, "x4")
y <- tribble(~key, ~val_y,
1, "y1",
2, "y2")
The join adds val_y
to the matching rows:
x <- tribble(~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
1, "x4")
y <- tribble(~key, ~val_y,
1, "y1",
2, "y2")
left_join(x, y, by = "key")
## # A tibble: 4 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 2 x3 y2
## 4 1 x4 y1
x
and y
have duplicated keys:x <- tribble(~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
3, "x4")
y <- tribble(~key, ~val_y,
1, "y1",
2, "y2",
2, "y3",
3, "y4")
x <- tribble(~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
3, "x4")
y <- tribble(~key, ~val_y,
1, "y1",
2, "y2",
2, "y3",
3, "y4")
left_join(x, y, by = "key")
## # A tibble: 6 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 2 x2 y3
## 4 2 x3 y2
## 5 2 x3 y3
## 6 3 x4 y4
flights2 %>%
left_join(weather)
## Joining, by = c("year", "month", "day", "hour", "origin")
## # A tibble: 336,776 x 18
## year month day hour origin dest tailnum carrier temp dewp
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 2013 1 1 5 EWR IAH N14228 UA 39.0 28.0
## 2 2013 1 1 5 LGA IAH N24211 UA 39.9 25.0
## 3 2013 1 1 5 JFK MIA N619AA AA 39.0 27.0
## 4 2013 1 1 5 JFK BQN N804JB B6 39.0 27.0
## 5 2013 1 1 6 LGA ATL N668DN DL 39.9 25.0
## 6 2013 1 1 5 EWR ORD N39463 UA 39.0 28.0
## 7 2013 1 1 6 EWR FLL N516JB B6 37.9 28.0
## 8 2013 1 1 6 LGA IAD N829AS EV 39.9 25.0
## 9 2013 1 1 6 JFK MCO N593JB B6 37.9 27.0
## 10 2013 1 1 6 LGA ORD N3ALAA AA 39.9 25.0
## # ... with 336,766 more rows, and 8 more variables: humid <dbl>,
## # wind_dir <dbl>, wind_speed <dbl>, wind_gust <dbl>, precip <dbl>,
## # pressure <dbl>, visib <dbl>, time_hour <dttm>
flights2 %>%
left_join(planes, by = "tailnum")
## # A tibble: 336,776 x 16
## year.x month day hour origin dest tailnum carrier year.y type
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <int> <chr>
## 1 2013 1 1 5 EWR IAH N14228 UA 1999 Fixe~
## 2 2013 1 1 5 LGA IAH N24211 UA 1998 Fixe~
## 3 2013 1 1 5 JFK MIA N619AA AA 1990 Fixe~
## 4 2013 1 1 5 JFK BQN N804JB B6 2012 Fixe~
## 5 2013 1 1 6 LGA ATL N668DN DL 1991 Fixe~
## 6 2013 1 1 5 EWR ORD N39463 UA 2012 Fixe~
## 7 2013 1 1 6 EWR FLL N516JB B6 2000 Fixe~
## 8 2013 1 1 6 LGA IAD N829AS EV 1998 Fixe~
## 9 2013 1 1 6 JFK MCO N593JB B6 2004 Fixe~
## 10 2013 1 1 6 LGA ORD N3ALAA AA NA <NA>
## # ... with 336,766 more rows, and 6 more variables:
## # manufacturer <chr>, model <chr>, engines <int>, seats <int>,
## # speed <int>, engine <chr>
by = c("a" = "b")
, left_join
matches variable a
in table x
to variable b
in table y
:flights2 %>%
left_join(airports, c("dest" = "faa"))
## # A tibble: 336,776 x 15
## year month day hour origin dest tailnum carrier name lat
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2013 1 1 5 EWR IAH N14228 UA Geor~ 30.0
## 2 2013 1 1 5 LGA IAH N24211 UA Geor~ 30.0
## 3 2013 1 1 5 JFK MIA N619AA AA Miam~ 25.8
## 4 2013 1 1 5 JFK BQN N804JB B6 <NA> NA
## 5 2013 1 1 6 LGA ATL N668DN DL Hart~ 33.6
## 6 2013 1 1 5 EWR ORD N39463 UA Chic~ 42.0
## 7 2013 1 1 6 EWR FLL N516JB B6 Fort~ 26.1
## 8 2013 1 1 6 LGA IAD N829AS EV Wash~ 38.9
## 9 2013 1 1 6 JFK MCO N593JB B6 Orla~ 28.4
## 10 2013 1 1 6 LGA ORD N3ALAA AA Chic~ 42.0
## # ... with 336,766 more rows, and 5 more variables: lon <dbl>,
## # alt <dbl>, tz <dbl>, dst <chr>, tzone <chr>
base::merge()
can perform all four types of mutating join:Advantages of the specific dplyr verbs:
dbplyr
.SQL is the inspiration for dplyr’s conventions:
Note that:
inner_join(x, y, by = c("a" = "b"))
vs SELECT * FROM x INNER JOIN y ON x.a = y.b
.semi_join(x, y)
keeps all observations in x
that have a match in y
.
anti_join(x, y)
drops all observations in x
that have a match in y
.
top_dest <- flights %>%
count(dest, sort = TRUE) %>%
head(10)
flights %>%
filter(dest %in% top_dest$dest) %>%
print(n = 5)
## # A tibble: 141,145 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 554 600 -6 812
## 3 2013 1 1 554 558 -4 740
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 838
## # ... with 141,140 more rows, and 12 more variables:
## # sched_arr_time <int>, arr_delay <dbl>, carrier <chr>,
## # flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
x
having a match in y
:flights %>%
semi_join(top_dest)
## Joining, by = "dest"
## # A tibble: 141,145 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 554 600 -6 812
## 3 2013 1 1 554 558 -4 740
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 838
## 6 2013 1 1 558 600 -2 753
## 7 2013 1 1 558 600 -2 924
## 8 2013 1 1 558 600 -2 923
## 9 2013 1 1 559 559 0 702
## 10 2013 1 1 600 600 0 851
## # ... with 141,135 more rows, and 12 more variables:
## # sched_arr_time <int>, arr_delay <dbl>, carrier <chr>,
## # flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
flights
without a match in planes
flights %>%
anti_join(planes,
by = "tailnum") %>%
count(tailnum, sort = TRUE)
## # A tibble: 722 x 2
## tailnum n
## <chr> <int>
## 1 <NA> 2512
## 2 N725MQ 575
## 3 N722MQ 513
## 4 N723MQ 507
## 5 N713MQ 483
## 6 N735MQ 396
## 7 N0EGMQ 371
## 8 N534MQ 364
## 9 N542MQ 363
## 10 N531MQ 349
## # ... with 712 more rows
Warm up:
library(lubridate)
today()
## [1] "2022-03-30"
now()
## [1] "2022-03-30 00:53:35 CDT"
as_datetime(today())
or conversely as_date(now()))
.ymd("2017-01-31")
## [1] "2017-01-31"
mdy("January 31st, 2017")
## [1] "2017-01-31"
dmy("31-Jan-2017")
## [1] "2017-01-31"
ymd_hms("2017-01-31 20:11:59")
## [1] "2017-01-31 20:11:59 UTC"
mdy_hm("01/31/2017 08:01")
## [1] "2017-01-31 08:01:00 UTC"
ymd(20170131)
## [1] "2017-01-31"
ymd(20170131, tz = "UTC")
## [1] "2017-01-31 UTC"
When date/time variables are spread across columns, use make_date()
, or make_datetime()
:
flights %>%
select(year, month, day, hour, minute, dep_time) %>%
mutate(departure = make_datetime(year, month, day, hour, minute))
## # A tibble: 336,776 x 7
## year month day hour minute dep_time departure
## <int> <int> <int> <dbl> <dbl> <int> <dttm>
## 1 2013 1 1 5 15 517 2013-01-01 05:15:00
## 2 2013 1 1 5 29 533 2013-01-01 05:29:00
## 3 2013 1 1 5 40 542 2013-01-01 05:40:00
## 4 2013 1 1 5 45 544 2013-01-01 05:45:00
## 5 2013 1 1 6 0 554 2013-01-01 06:00:00
## 6 2013 1 1 5 58 554 2013-01-01 05:58:00
## 7 2013 1 1 6 0 555 2013-01-01 06:00:00
## 8 2013 1 1 6 0 557 2013-01-01 06:00:00
## 9 2013 1 1 6 0 557 2013-01-01 06:00:00
## 10 2013 1 1 6 0 558 2013-01-01 06:00:00
## # ... with 336,766 more rows
dep_time
and others such as arr_time
, the time
variable combined into a “military time” format (e.g., 540
). To extract hour and minute info and use make_datetime
:make_datetime_100 <- function(year, month, day, time) {
make_datetime(year, month, day, time %/% 100, time %% 100)
}
flights_dt <- flights %>%
filter(!is.na(dep_time), !is.na(arr_time)) %>%
mutate(dep_time = make_datetime_100(year, month, day, dep_time),
arr_time = make_datetime_100(year, month, day, arr_time)) %>%
select(origin, dest, ends_with("delay"), ends_with("time"))
flights_dt %>%
ggplot(aes(dep_time)) +
geom_freqpoly(binwidth = 86400) + # 86400s=1d
ggtitle("Departures in a year")
flights_dt %>%
filter(dep_time < ymd(20130102)) %>%
ggplot(aes(dep_time)) +
geom_freqpoly(binwidth = 600) + # 600s=10mn
ggtitle("Departures on January 1st")
floor_date()
rounds down.round_date()
rounds to.ceiling_date()
rounds up.make_datetime_100 <- function(year, month, day, time) {
make_datetime(year, month, day, time %/% 100, time %% 100)
}
flights_dt <- flights %>%
filter(!is.na(dep_time), !is.na(arr_time)) %>%
mutate(dep_time = make_datetime_100(year, month, day, dep_time),
arr_time = make_datetime_100(year, month, day, arr_time)) %>%
select(origin, dest, ends_with("delay"), ends_with("time"))
flights_dt %>%
count(week = floor_date(dep_time, "week")) %>%
ggplot(aes(week, n)) +
geom_line()
datetime <- ymd_hms("2016-07-08 12:34:56")
c(year(datetime), month(datetime), mday(datetime),
yday(datetime), wday(datetime))
## [1] 2016 7 8 190 6
datetime <- ymd_hms("2016-07-08 12:34:56")
year(datetime) <- 2020
datetime
## [1] "2020-07-08 12:34:56 UTC"
month(datetime) <- 01
datetime
## [1] "2020-01-08 12:34:56 UTC"
hour(datetime) <- hour(datetime) + 1
datetime
## [1] "2020-01-08 13:34:56 UTC"
update(datetime, year = 2020)
.Flights distribution across the day
flights_dt %>%
mutate(dep_hour = update(dep_time, yday = 1)) %>%
ggplot(aes(dep_hour)) +
geom_freqpoly(binwidth = 300)
dseconds(15)
## [1] "15s"
dminutes(10)
## [1] "600s (~10 minutes)"
dhours(c(12, 24))
## [1] "43200s (~12 hours)" "86400s (~1 days)"
ddays(0:5)
## [1] "0s" "86400s (~1 days)" "172800s (~2 days)"
## [4] "259200s (~3 days)" "345600s (~4 days)" "432000s (~5 days)"
dweeks(3)
## [1] "1814400s (~3 weeks)"
dyears(1)
## [1] "31557600s (~1 years)"
2 * dyears(1)
## [1] "63115200s (~2 years)"
dyears(1) + dweeks(12) + dhours(15)
## [1] "38869200s (~1.23 years)"
tomorrow <- today() + ddays(1)
last_year <- today() - dyears(1)
one_pm <- ymd_hms("2016-03-12 13:00:00", tz = "America/New_York")
one_pm
## [1] "2016-03-12 13:00:00 EST"
one_pm + ddays(1)
## [1] "2016-03-13 14:00:00 EDT"
one_pm
## [1] "2016-03-12 13:00:00 EST"
one_pm + days(1)
## [1] "2016-03-13 13:00:00 EDT"
seconds(15)
## [1] "15S"
minutes(10)
## [1] "10M 0S"
hours(c(12, 24))
## [1] "12H 0M 0S" "24H 0M 0S"
days(7)
## [1] "7d 0H 0M 0S"
months(1:3)
## [1] "1m 0d 0H 0M 0S" "2m 0d 0H 0M 0S" "3m 0d 0H 0M 0S"
weeks(3)
## [1] "21d 0H 0M 0S"
years(1)
## [1] "1y 0m 0d 0H 0M 0S"
10 * (months(6) + days(1))
## [1] "60m 10d 0H 0M 0S"
days(50) + hours(25) + minutes(2)
## [1] "50d 25H 2M 0S"
# A leap year
ymd("2016-01-01") + dyears(1)
## [1] "2016-12-31 06:00:00 UTC"
ymd("2016-01-01") + years(1)
## [1] "2017-01-01"
# Daylight Savings Time
one_pm + ddays(1)
## [1] "2016-03-13 14:00:00 EDT"
one_pm + days(1)
## [1] "2016-03-13 13:00:00 EDT"
years(1) / days(1)
## [1] 365.25
next_year <- today() + years(1)
(today() %--% next_year) / ddays(1)
## [1] 365
Sys.timezone()
## [1] "America/Chicago"
length(OlsonNames())
## [1] 594
head(OlsonNames())
## [1] "Africa/Abidjan" "Africa/Accra" "Africa/Addis_Ababa"
## [4] "Africa/Algiers" "Africa/Asmara" "Africa/Asmera"
(x1 <- ymd_hms("2015-06-01 12:00:00", tz = "America/New_York"))
## [1] "2015-06-01 12:00:00 EDT"
(x2 <- ymd_hms("2015-06-01 18:00:00", tz = "Europe/Copenhagen"))
## [1] "2015-06-01 18:00:00 CEST"
(x3 <- ymd_hms("2015-06-02 04:00:00", tz = "Pacific/Auckland"))
## [1] "2015-06-02 04:00:00 NZST"
x1 - x2
## Time difference of 0 secs
x1 - x3
## Time difference of 0 secs
# behavior of c()
x4 <- c(x1, x2, x3)
x4
## [1] "2015-06-01 12:00:00 EDT" "2015-06-01 12:00:00 EDT"
## [3] "2015-06-01 12:00:00 EDT"
(x1 <- ymd_hms("2015-06-01 12:00:00", tz = "America/New_York"))
## [1] "2015-06-01 12:00:00 EDT"
(x2 <- ymd_hms("2015-06-01 18:00:00", tz = "Europe/Copenhagen"))
## [1] "2015-06-01 18:00:00 CEST"
(x3 <- ymd_hms("2015-06-02 04:00:00", tz = "Pacific/Auckland"))
## [1] "2015-06-02 04:00:00 NZST"
x4 <- c(x1, x2, x3)
x4a <- with_tz(x4, tzone = "Australia/Lord_Howe")
x4a
## [1] "2015-06-02 02:30:00 +1030" "2015-06-02 02:30:00 +1030"
## [3] "2015-06-02 02:30:00 +1030"
x4a - x4
## Time differences in secs
## [1] 0 0 0
(x1 <- ymd_hms("2015-06-01 12:00:00", tz = "America/New_York"))
## [1] "2015-06-01 12:00:00 EDT"
(x2 <- ymd_hms("2015-06-01 18:00:00", tz = "Europe/Copenhagen"))
## [1] "2015-06-01 18:00:00 CEST"
(x3 <- ymd_hms("2015-06-02 04:00:00", tz = "Pacific/Auckland"))
## [1] "2015-06-02 04:00:00 NZST"
x4 <- c(x1, x2, x3)
x4b <- force_tz(x4, tzone = "Australia/Lord_Howe")
x4b
## [1] "2015-06-01 12:00:00 +1030" "2015-06-01 12:00:00 +1030"
## [3] "2015-06-01 12:00:00 +1030"
x4b - x4
## Time differences in hours
## [1] -14.5 -14.5 -14.5
Besides what has been covered here, there are some additional toolkits for data wrangling, e.g.,
forcats
package;stringr
and regular expressions.You can read about them in Hadley Wickham’s R for Data Science book.