2  In Class Exercise 1

Published

October 19, 2022

2.0.1 Import Required Libraries

library(nycflights13)
library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.3      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
str(flights)
tibble [336,776 × 19] (S3: tbl_df/tbl/data.frame)
 $ year          : int [1:336776] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
 $ month         : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
 $ day           : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
 $ dep_time      : int [1:336776] 517 533 542 544 554 554 555 557 557 558 ...
 $ sched_dep_time: int [1:336776] 515 529 540 545 600 558 600 600 600 600 ...
 $ dep_delay     : num [1:336776] 2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
 $ arr_time      : int [1:336776] 830 850 923 1004 812 740 913 709 838 753 ...
 $ sched_arr_time: int [1:336776] 819 830 850 1022 837 728 854 723 846 745 ...
 $ arr_delay     : num [1:336776] 11 20 33 -18 -25 12 19 -14 -8 8 ...
 $ carrier       : chr [1:336776] "UA" "UA" "AA" "B6" ...
 $ flight        : int [1:336776] 1545 1714 1141 725 461 1696 507 5708 79 301 ...
 $ tailnum       : chr [1:336776] "N14228" "N24211" "N619AA" "N804JB" ...
 $ origin        : chr [1:336776] "EWR" "LGA" "JFK" "JFK" ...
 $ dest          : chr [1:336776] "IAH" "IAH" "MIA" "BQN" ...
 $ air_time      : num [1:336776] 227 227 160 183 116 150 158 53 140 138 ...
 $ distance      : num [1:336776] 1400 1416 1089 1576 762 ...
 $ hour          : num [1:336776] 5 5 5 5 6 5 6 6 6 6 ...
 $ minute        : num [1:336776] 15 29 40 45 0 58 0 0 0 0 ...
 $ time_hour     : POSIXct[1:336776], format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...

2.0.2 Analysis 1 - On Time Arrival Rate for Destinations

flights %>%
  mutate(arr_type = ifelse(arr_delay < 5, "on time", "delayed")) %>%
  group_by(dest) %>%
  summarise(count=n(), ot_arr_rate = sum(arr_type == "on time", na.rm=TRUE) / n()*100, "%") %>%
  arrange(desc(ot_arr_rate)) %>%
  print(n=25)
# A tibble: 105 × 4
   dest  count ot_arr_rate `"%"`
   <chr> <int>       <dbl> <chr>
 1 LEX       1       100   %    
 2 PSP      19        78.9 %    
 3 SNA     825        74.2 %    
 4 STT     522        73.6 %    
 5 MVY     221        72.9 %    
 6 HNL     707        72.3 %    
 7 BOS   15508        71.2 %    
 8 SEA    3923        71.1 %    
 9 MIA   11728        70.9 %    
10 SLC    2467        70.5 %    
11 SBN      10        70   %    
12 LAS    5997        69.5 %    
13 LAX   16174        68.1 %    
14 LGB     668        68.0 %    
15 ACK     265        67.9 %    
16 DFW    8738        67.7 %    
17 SJC     329        67.5 %    
18 SRQ    1211        67.5 %    
19 SJU    5819        67.2 %    
20 DTW    9384        67.1 %    
21 SFO   13331        66.9 %    
22 MTJ      15        66.7 %    
23 MYR      59        66.1 %    
24 MCO   14082        65.6 %    
25 PHX    4656        65.5 %    
# … with 80 more rows

2.0.3 Analysis 2 - Departure Delay by Months

flights %>%
  group_by(month) %>%
  summarize(count = n(),
    avg_dep_delay = mean(dep_delay, na.rm = TRUE),
    max_dep_delay = max(dep_delay, na.rm = TRUE),
    min_dep_delay = min(dep_delay, na.rm = TRUE),    
  )
# A tibble: 12 × 5
   month count avg_dep_delay max_dep_delay min_dep_delay
   <int> <int>         <dbl>         <dbl>         <dbl>
 1     1 27004         10.0           1301           -30
 2     2 24951         10.8            853           -33
 3     3 28834         13.2            911           -25
 4     4 28330         13.9            960           -21
 5     5 28796         13.0            878           -24
 6     6 28243         20.8           1137           -21
 7     7 29425         21.7           1005           -22
 8     8 29327         12.6            520           -26
 9     9 27574          6.72          1014           -24
10    10 28889          6.24           702           -25
11    11 27268          5.44           798           -32
12    12 28135         16.6            896           -43