library(tidyverse)
library(dplyr)
load(file = "/Users/m2lmacbook1/Desktop/Courses/BDA 503 | Data Analytics Essentials/atp_tennis_data_2017.RData")
- Task 1: Rank countries by the singles champions.
- Task 2: Rank countries which did not get any singles championships by the games. (Won when they win the match)
- Task 3: Rank player ages by the single champions.
- Task 4: List players over 30 years old who have never won a singles championship.
Task 1
task1 <- inner_join(tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))
task1 %>%
group_by(flag_code) %>% count(flag_code, sort=TRUE)
## # A tibble: 21 x 2
## # Groups: flag_code [21]
## flag_code n
## <chr> <int>
## 1 ESP 11
## 2 USA 9
## 3 SUI 8
## 4 FRA 7
## 5 GER 7
## 6 BUL 4
## 7 ARG 2
## 8 BEL 2
## 9 BIH 2
## 10 CRO 2
## # … with 11 more rows
Task 2
nonchamp_players <-
player_df %>% select(player_id, flag_code) %>% anti_join (.,task1)
## Joining, by = "flag_code"
nonchamp_players %>% left_join(.,score_df, by=c("player_id"="winner_player_id")) %>%
group_by(flag_code) %>% summarise(total_won=sum(winner_games_won, na.rm=TRUE)) %>% arrange(desc(total_won))
## # A tibble: 93 x 2
## flag_code total_won
## <chr> <dbl>
## 1 AUS 1989
## 2 CZE 1209
## 3 CAN 1190
## 4 SVK 889
## 5 BRA 873
## 6 POR 621
## 7 RSA 566
## 8 KAZ 495
## 9 KOR 438
## 10 GEO 377
## # … with 83 more rows
Task 3
task3_a <- rank_df %>% distinct(player_id, .keep_all = TRUE)
task3 <- inner_join(tourney_df, task3_a, by = c("singles_winner_player_id"="player_id"))
task3 %>%
group_by(player_age) %>% count(player_age, sort=TRUE)
## # A tibble: 16 x 2
## # Groups: player_age [16]
## player_age n
## <dbl> <int>
## 1 31 8
## 2 36 8
## 3 26 7
## 4 30 7
## 5 32 7
## 6 20 6
## 7 25 6
## 8 29 6
## 9 23 3
## 10 34 3
## 11 21 1
## 12 24 1
## 13 27 1
## 14 28 1
## 15 35 1
## 16 37 1
Task 4
task4_a <- task3_a %>% select(player_id, player_age) %>% filter(player_age > 30) %>% anti_join (.,task3)
## Joining, by = "player_age"
task4 <- player_df %>% select(player_id, first_name, last_name) %>% inner_join(.,task4_a) %>% arrange(desc(player_age))
## Joining, by = "player_id"
head(task4, 10)
## # A tibble: 10 x 4
## player_id first_name last_name player_age
## <chr> <chr> <chr> <dbl>
## 1 e121 Younes El Aynaoui 46
## 2 s550 Takao Suzuki 41
## 3 m747 Frank Moser 40
## 4 s535 Razvan Sabau 40
## 5 h355 Tommy Haas 39
## 6 k418 Oh-Hee Kwon 39
## 7 m815 Toshihide Matsui 39
## 8 r383 Ruben Ramirez Hidalgo 39
## 9 h442 Jan Hernych 38
## 10 k336 Ivo Karlovic 38