library(dplyr)
library(tidyverse)
load(file = "/Users/mehtapanalyticahouse/Desktop/atp_tennis_data_2017.RData")
Task 1:
- Rank countries (flag codes) by the singles champions.
task1 <- inner_join (tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))
task1 %>%
group_by(flag_code) %>% count(flag_code, sort=TRUE)
## # A tibble: 21 x 2
## # Groups: flag_code [21]
## flag_code n
## <chr> <int>
## 1 ESP 11
## 2 USA 9
## 3 SUI 8
## 4 FRA 7
## 5 GER 7
## 6 BUL 4
## 7 ARG 2
## 8 BEL 2
## 9 BIH 2
## 10 CRO 2
## # … with 11 more rows
Task 2:
- Rank countries which did not get any singles championships by the games. (Won when they win the match)
nonchamp_players <-
player_df %>% select(player_id, flag_code) %>% anti_join (.,task1)
## Joining, by = "flag_code"
nonchamp_players %>% left_join(.,score_df, by=c("player_id"="winner_player_id")) %>%
group_by(flag_code) %>% summarise(total_won=sum(winner_games_won, na.rm=TRUE)) %>% arrange(desc(total_won))
## # A tibble: 93 x 2
## flag_code total_won
## <chr> <dbl>
## 1 AUS 1989
## 2 CZE 1209
## 3 CAN 1190
## 4 SVK 889
## 5 BRA 873
## 6 POR 621
## 7 RSA 566
## 8 KAZ 495
## 9 KOR 438
## 10 GEO 377
## # … with 83 more rows
Task 3:
- Ranking of the names and surnames of the players who won the match in 25 minutes.
task3 <- stats_df %>% select(match_id, match_duration)
task3_1 <- inner_join(task3, score_df) %>%
group_by(winner_player_id) %>% filter(match_duration < 25) %>% select(match_duration)
## Joining, by = "match_id"
## Adding missing grouping variables: `winner_player_id`
task3_2 <- player_df %>% select(player_id, first_name, last_name) %>% inner_join(.,task3_1, by = c("player_id"="winner_player_id")) %>% arrange(desc(match_duration))
print(task3_2)
## # A tibble: 21 x 4
## player_id first_name last_name match_duration
## <chr> <chr> <chr> <dbl>
## 1 i165 Denis Istomin 24
## 2 n409 Rafael Nadal 24
## 3 ca12 Jeremy Chardy 23
## 4 d985 Joao Domingues 23
## 5 j551 Nicolas Jarry 23
## 6 pd31 Benoit Paire 22
## 7 se73 Yuichi Sugita 22
## 8 mj01 Roberto Marcora 21
## 9 t742 Janko Tipsarevic 21
## 10 pc78 Ante Pavic 20
## # … with 11 more rows
Task 4:
- Number of left and right-handed players among single winner players.
task4 <- inner_join(tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))
task4 %>%
group_by(handedness) %>% count(handedness, sort=TRUE)
## # A tibble: 2 x 2
## # Groups: handedness [2]
## handedness n
## <chr> <int>
## 1 Right-Handed 58
## 2 Left-Handed 9