1- Rank countries(flag codes) by the singles champions.

library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.2.1     <U+221A> purrr   0.3.3
## <U+221A> tibble  2.1.3     <U+221A> dplyr   0.8.3
## <U+221A> tidyr   1.0.0     <U+221A> stringr 1.4.0
## <U+221A> readr   1.3.1     <U+221A> forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
our_data <- "~/atp_tennis_data_2017.RData"

load(our_data)

task1 <- inner_join(tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))

champ_flags_df<- task1 %>%
  group_by(flag_code) %>% count(flag_code, sort=TRUE)

champ_flags_df
## # A tibble: 21 x 2
## # Groups:   flag_code [21]
##    flag_code     n
##    <chr>     <int>
##  1 ESP          11
##  2 USA           9
##  3 SUI           8
##  4 FRA           7
##  5 GER           7
##  6 BUL           4
##  7 ARG           2
##  8 BEL           2
##  9 BIH           2
## 10 CRO           2
## # ... with 11 more rows

2- Rank countries which did not get any singles championship by the games won when they win the match.

nonchamp_players<- player_df %>% 
  select(player_id, flag_code) %>% 
  anti_join(., champ_flags_df)
## Joining, by = "flag_code"
nonchamp_players %>% left_join(.,score_df, by= c("player_id"="winner_player_id")) %>%
                                 group_by(flag_code) %>%
                                 summarise(total_won= sum(winner_games_won, na.rm=TRUE)) %>%
                                 arrange(desc(total_won))
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # ... with 83 more rows

3- Rank names of players who are champions in both singles and doubles in the same tournament.

both_champions<- tourney_df %>%
  filter(singles_winner_player_id==doubles_winner_1_player_id|
           singles_winner_player_id==doubles_winner_2_player_id)

names_player_bc<- inner_join(both_champions, player_df, by = c("singles_winner_player_id"="player_id"))

names_player_bc$player_slug
## [1] "alexander-zverev"

4- Which hand do players use who champions in singles.

task4 <- inner_join(tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))

which_hand<- task1 %>%
  group_by(handedness) %>% count(handedness)

which_hand
## # A tibble: 2 x 2
## # Groups:   handedness [2]
##   handedness       n
##   <chr>        <int>
## 1 Left-Handed      9
## 2 Right-Handed    58

=======

title: “ATP_Assignment” author: “Bulent Buyuk” date: “27 11 2019” output: html_document —

1- Rank countries(flag codes) by the singles champions.

library(tidyverse)

our_data <- "~/atp_tennis_data_2017.RData"

load(our_data)

task1 <- inner_join(tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))

champ_flags_df<- task1 %>%
  group_by(flag_code) %>% count(flag_code, sort=TRUE)

champ_flags_df
## # A tibble: 21 x 2
## # Groups:   flag_code [21]
##    flag_code     n
##    <chr>     <int>
##  1 ESP          11
##  2 USA           9
##  3 SUI           8
##  4 FRA           7
##  5 GER           7
##  6 BUL           4
##  7 ARG           2
##  8 BEL           2
##  9 BIH           2
## 10 CRO           2
## # ... with 11 more rows

2- Rank countries which did not get any singles championship by the games won when they win the match.

nonchamp_players<- player_df %>% 
  select(player_id, flag_code) %>% 
  anti_join(., champ_flags_df)
## Joining, by = "flag_code"
nonchamp_players %>% left_join(.,score_df, by= c("player_id"="winner_player_id")) %>%
                                 group_by(flag_code) %>%
                                 summarise(total_won= sum(winner_games_won, na.rm=TRUE)) %>%
                                 arrange(desc(total_won))
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # ... with 83 more rows

3- Rank names of players who are champions in both singles and doubles in the same tournament.

both_champions<- tourney_df %>%
  filter(singles_winner_player_id==doubles_winner_1_player_id|
           singles_winner_player_id==doubles_winner_2_player_id)

names_player_bc<- inner_join(both_champions, player_df, by = c("singles_winner_player_id"="player_id"))

names_player_bc$player_slug
## [1] "alexander-zverev"

4- Which hand do players use who champions in singles.

task4 <- inner_join(tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))

which_hand<- task1 %>%
  group_by(handedness) %>% count(handedness)

which_hand
## # A tibble: 2 x 2
## # Groups:   handedness [2]
##   handedness       n
##   <chr>        <int>
## 1 Left-Handed      9
## 2 Right-Handed    58