library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
load("/Users/bilgehan.kiran/Documents/BilgeDersler/BigDataAnalytics/week11/atp_tennis_data_2017.RData")

1-Rank countries (flag codes) by the singles champions order

champ_flags_df <- tourney_df %>% left_join(.,player_df, by =c("singles_winner_player_id"="player_id")) %>% count(flag_code, sort = TRUE) 

champ_flags_df
## # A tibble: 21 x 2
##    flag_code     n
##    <chr>     <int>
##  1 ESP          11
##  2 USA           9
##  3 SUI           8
##  4 FRA           7
##  5 GER           7
##  6 BUL           4
##  7 ARG           2
##  8 BEL           2
##  9 BIH           2
## 10 CRO           2
## # … with 11 more rows

2-Rank countries which didn’t get any singles championships by the genres won when they win the match

nonchamp_players <- player_df  %>% select(player_id, flag_code) %>% anti_join(.,champ_flags_df)
## Joining, by = "flag_code"
nochamp_winn_player <- nonchamp_players %>% left_join(.,score_df, by=c("player_id"="winner_player_id")) 

nochamp_winn_player %>%  group_by(flag_code)  %>% summarise(total_wins=sum(winner_games_won,na.rm = T)) %>% arrange(desc(total_wins)) 
## # A tibble: 93 x 2
##    flag_code total_wins
##    <chr>          <dbl>
##  1 AUS             1989
##  2 CZE             1209
##  3 CAN             1190
##  4 SVK              889
##  5 BRA              873
##  6 POR              621
##  7 RSA              566
##  8 KAZ              495
##  9 KOR              438
## 10 GEO              377
## # … with 83 more rows

3-No championship winner players whose service points were better than the winner of their matches

nochamp_match_id <- inner_join(nonchamp_players,score_df, by=c("player_id"="loser_player_id")) %>% select(player_id,match_id ) 

player_better_in_service <- inner_join(nochamp_match_id , stats_df) %>% select(player_id,winner_service_points_won, loser_service_points_won) %>% filter(winner_service_points_won < loser_service_points_won)
## Joining, by = "match_id"
player_info <- inner_join(player_better_in_service, player_df) %>% distinct(player_id,first_name, last_name, flag_code,birth_place)
## Joining, by = "player_id"
player_info
## # A tibble: 82 x 5
##    player_id first_name last_name    flag_code birth_place               
##    <chr>     <chr>      <chr>        <chr>     <chr>                     
##  1 a678      Kevin      Anderson     RSA       Johannesburg, South Africa
##  2 a829      Radu       Albot        MDA       Chisinau, Moldova         
##  3 a853      Marcelo    Arevalo      ESA       Sonsonate, El Salvador    
##  4 b837      Marcos     Baghdatis    CYP       Limassol, Cyprus          
##  5 ba47      Tomas      Berdych      CZE       Valasske Mezirici, CZE    
##  6 bc68      Philip     Bester       CAN       Sonthoffen, Germany       
##  7 bd20      Thomaz     Bellucci     BRA       Tiete, Brazil             
##  8 bd80      Attila     Balazs       HUN       Budapest, Hungary         
##  9 bg23      Nikoloz    Basilashvili GEO       Tbilisi, Georgia          
## 10 bh98      Matthew    Barton       AUS       Sydney, Australia         
## # … with 72 more rows

We can see the information about 82 players who had better service points in the matches they had.

4- rank of the countries which had least match winner

champ_winner_players <- score_df %>% left_join(.,player_df, by=c("winner_player_id"="player_id")) 

champ_winner_players %>%  group_by(flag_code)  %>% count()  %>% arrange(n) 
## # A tibble: 61 x 2
## # Groups:   flag_code [61]
##    flag_code     n
##    <chr>     <int>
##  1 ESA           1
##  2 TUR           1
##  3 EST           2
##  4 MAR           2
##  5 NZL           3
##  6 SLO           3
##  7 ECU           4
##  8 BLR           5
##  9 IRL           5
## 10 EGY           7
## # … with 51 more rows

As we can see sadly TR is one of the countries who had least match winner.