library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
url<-url("https://github.com/pjournal/mef03-scivan/blob/master/atp_tennis_data_2017.RData?raw=TRUE")
atp_tennis<-load(url)

TASK 1 - Rank countries (Flag codes) by the single champions.

singles_winners<-left_join(tourney_df,player_df,by=c("singles_winner_player_id"="player_id"))
champ_flags_df<-singles_winners%>%select(singles_winner_player_id,flag_code)%>%count(flag_code,sort=T)
champ_flags_df
## # A tibble: 21 x 2
##    flag_code     n
##    <chr>     <int>
##  1 ESP          11
##  2 USA           9
##  3 SUI           8
##  4 FRA           7
##  5 GER           7
##  6 BUL           4
##  7 ARG           2
##  8 BEL           2
##  9 BIH           2
## 10 CRO           2
## # … with 11 more rows

TASK 2 - Rank countries which did not get any singles championships by the games won when they win the match.

no_champ_players<- player_df %>%select(player_id, flag_code) %>%anti_join(., champ_flags_df,by="flag_code")
no_champ_players %>% left_join(.,score_df, by= c("player_id"="winner_player_id")) %>%group_by(flag_code) %>%summarise(total_won= sum(winner_games_won, na.rm=TRUE)) %>%arrange(desc(total_won))
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # … with 83 more rows

TASK 3 - Which tourney contains the most match number

score_df$tourney_id <- as.numeric(score_df$tourney_id)
match_num_tourney <- left_join(score_df,tourney_df) %>% select("tourney_name","match_id") %>%  count(tourney_name) %>% arrange(desc(n))
## Joining, by = "tourney_id"
match_num_tourney
## # A tibble: 67 x 2
##    tourney_name                     n
##    <chr>                        <int>
##  1 Australian Open                239
##  2 Roland Garros                  239
##  3 Wimbledon                      239
##  4 BNP Paribas Open               131
##  5 Miami Open presented by Itaú   131
##  6 US Open                        127
##  7 Coupe Rogers                    76
##  8 Internazionali BNL d’Italia     76
##  9 Mutua Madrid Open               76
## 10 Rolex Monte-Carlo Masters       76
## # … with 57 more rows

TASK 4 - Number of tourneys_played per player

players_tourneys <- rank_df %>% group_by(player_id) %>% summarise(tourneys_played = sum(tourneys_played)) %>% select(player_id,tourneys_played)

players_tourneys1  <- left_join(players_tourneys,player_df) %>% select("player_id","player_slug","tourneys_played")%>% na.omit() %>% arrange(desc(tourneys_played))
## Joining, by = "player_id"
players_tourneys1
## # A tibble: 977 x 3
##    player_id player_slug        tourneys_played
##    <chr>     <chr>                        <dbl>
##  1 o482      goncalo-oliveira              1771
##  2 n561      ivan-nedelko                  1477
##  3 pg13      jaume-pla-malfeito            1434
##  4 z184      horacio-zeballos              1381
##  5 pd31      benoit-paire                  1373
##  6 sq80      akira-santillan               1369
##  7 c813      ti-chen                       1359
##  8 l503      paolo-lorenzi                 1352
##  9 u120      kaichi-uchida                 1352
## 10 ga79      lorenzo-giustino              1329
## # … with 967 more rows

players_tourneys <- left_join(player_df,rank_df) %>% select(“player_slug”,“tourneys_played”)