load("C:/Users/ayo/Desktop/atp_tennis_data_2017.RData")

Task 1

Let’s find the player’s coutries who is ranked as top 100.

rank_df %>% group_by(player_id) %>% summarize(total_points=sum(ranking_points)) %>% 
  top_n(100) %>% left_join(.,player_df) %>% 
  group_by (flag_code) %>% count() %>% arrange(desc(n)) %>% 
  ggplot(data=., aes(x=reorder(flag_code,-n), y=n, fill=flag_code)) + 
          geom_bar(stat = "identity") +
          theme_minimal() +
          theme(axis.text.x = element_text(angle = 90, hjust = 0.6))
## Selecting by total_pointsJoining, by = "player_id"

Task 2

Let’s find the players who didn’t win in Finals (who either lose or didn’t play in Finals at all)

score_df %>% filter(tourney_round_name=='Finals') %>% anti_join(player_df,., by=c("player_id" = "winner_player_id")) %>% 
  select(first_name, last_name, flag_code)
## # A tibble: 10,877 x 3
##    first_name last_name flag_code
##    <chr>      <chr>     <chr>    
##  1 Ricardo    Acuna     CHI      
##  2 Sadiq      Abdullahi NGR      
##  3 Nelson     Aerts     BRA      
##  4 Egan       Adams     USA      
##  5 Ronald     Agenor    USA      
##  6 Juan       Aguilera  ESP      
##  7 Marc       Albert    NED      
##  8 Marco      Alciati   ITA      
##  9 Richard    Akel      USA      
## 10 John       Alexander AUS      
## # ... with 10,867 more rows