1 Dataset

Read clean data set “df_clean.rds” that we produced at the previous step Preprocessing Step 2: Data Cleaining & Transforming

df_all = readRDS("df_clean.rds") %>% glimpse()
## Rows: 826,638
## Columns: 40
## $ OKS                                 <lgl> FALSE, FALSE, FALSE, FALSE, FAL...
## $ participation                       <lgl> FALSE, TRUE, FALSE, FALSE, FALS...
## $ contribution                        <lgl> FALSE, FALSE, FALSE, FALSE, FAL...
## $ date                                <dttm> 2015-12-31, 2015-12-31, 2015-1...
## $ code                                <chr> "ABE", "AEA", "AEB", "AEC", "AE...
## $ fund_type                           <chr> "pension", "pension", "pension"...
## $ category                            <chr> "Variable Fund", "Gold Fund", "...
## $ name                                <chr> "ANADOLU HAYAT EMEKLILIK A.S.B....
## $ price                               <dbl> 0.012212, 0.011026, 0.033588, 0...
## $ shares                              <dbl> 3876175452, 10761804554, 219965...
## $ people                              <dbl> 25887, 97857, 13269, 101, 5030,...
## $ total_value                         <dbl> 47336763, 118662196, 73881771, ...
## $ p_Bank_Bills                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Other                             <dbl> 3.02, 94.31, 1.90, 0.39, 1.79, ...
## $ p_FX_Payable_Bills                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Government_Bond                   <dbl> 0.66, 0.00, 0.84, 60.90, 66.88,...
## $ p_Foreign_Currency_Bills            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Eurobonds                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Commercial_Paper                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Fund_Participation_Certificate    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Real_Estate_Certificate           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Treasury_Bill                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Stock                             <dbl> 0.00, 0.88, 82.55, 4.76, 29.47,...
## $ p_Government_Bonds_and_Bills_FX     <dbl> 0.00, 0.00, 0.00, 11.09, 0.00, ...
## $ p_Participation_Account             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Government_Lease_Certificates     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Precious_Metals                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Private_Sector_Lease_Certificates <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Private_Sector_Bond               <dbl> 0.00, 4.81, 0.00, 22.78, 0.00, ...
## $ p_Repo                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Derivatives                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_TMM                               <dbl> 0.00, 0.00, 6.61, 0.08, 0.00, 7...
## $ p_Reverse_Repo                      <dbl> 0.40, 0.00, 8.10, 0.00, 1.86, 2...
## $ p_Asset_Backed_Securities           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Term_Deposit                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Foreign_Debt_Instruments          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Foreign_Equity                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Foreign_Securities                <dbl> 95.92, 0.00, 0.00, 0.00, 0.00, ...
## $ company_code                        <chr> "AEM", "AEM", "AHS", "HYS", "KH...
## $ company_name                        <chr> "ANADOLU HAYAT EMEKLILIK A.S.",...

df_all covers all dataset

2 Overall Information about Dataset

unique number of funds

df_all %>% distinct(code) %>% summarise(count=n())
## # A tibble: 1 x 1
##   count
##   <int>
## 1   920

Unique number of funds by types

df_all %>% group_by(fund_type)%>%  distinct(code) %>% summarise(count=n())
## # A tibble: 2 x 2
##   fund_type count
##   <chr>     <int>
## 1 mutual      516
## 2 pension     404

Unique number of funds by types and categories

df_all %>% group_by(fund_type, category)%>%  distinct(code) %>% summarise(count=n())%>%arrange(desc(count))
## # A tibble: 31 x 3
## # Groups:   fund_type [2]
##    fund_type category                      count
##    <chr>     <chr>                         <int>
##  1 mutual    Hedge Umbrella Fund             231
##  2 pension   Variable Fund                   166
##  3 mutual    Debt Securities Umbrella Fund    70
##  4 mutual    Variable Umbrella Fund           67
##  5 mutual    Stock Umbrella Fund              62
##  6 mutual    Participation Umbrella Fund      30
##  7 pension   Stock Fund                       28
##  8 mutual    Money Market Umbrella Fund       25
##  9 pension   Debt Instruments Fund            25
## 10 mutual    Fund Of Funds Umbrella Fund      17
## # ... with 21 more rows

Date coverage of data: max length of our data set is 1834 days with 1262 data points. No fund data during weekends and official holidays.

df_all %>% distinct(date) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=max(date)-min(date))
## # A tibble: 1 x 4
##   count latest              earliest            length   
##   <int> <dttm>              <dttm>              <drtn>   
## 1  1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days

total number of data points and date coverage for each fund. Some funds have huge gaps in their data. Ratio of data points to length of time is as low as 25% for some funds while majority of funds it is over 65%. almost all of the funds with data irregularities are Hedge Umberella Funds

df_all %>% group_by(code, fund_type,category) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=as.integer(max(date)-min(date))) %>% mutate(coverage=count/length)%>%arrange((coverage))
## # A tibble: 920 x 8
## # Groups:   code, fund_type [920]
##    code  fund_type category count latest              earliest            length
##    <chr> <chr>     <chr>    <int> <dttm>              <dttm>               <int>
##  1 USY   mutual    Hedge U~   462 2020-11-16 00:00:00 2015-12-01 00:00:00   1812
##  2 IPF   mutual    Hedge U~   466 2020-11-16 00:00:00 2015-12-01 00:00:00   1812
##  3 IPP   mutual    Hedge U~   466 2020-11-16 00:00:00 2015-12-01 00:00:00   1812
##  4 OSH   mutual    Hedge U~   466 2020-11-16 00:00:00 2015-12-01 00:00:00   1812
##  5 CTV   mutual    Hedge U~    34 2020-11-16 00:00:00 2020-07-09 00:00:00    130
##  6 TDB   mutual    Hedge U~   481 2020-11-16 00:00:00 2015-11-30 00:00:00   1813
##  7 IAF   mutual    Hedge U~   489 2020-11-16 00:00:00 2015-12-01 00:00:00   1812
##  8 TPP   mutual    Hedge U~   461 2020-11-16 00:00:00 2016-05-02 00:00:00   1659
##  9 YKS   mutual    Hedge U~   513 2020-11-16 00:00:00 2015-11-17 00:00:00   1826
## 10 YPF   mutual    Hedge U~   507 2020-11-16 00:00:00 2016-02-01 00:00:00   1750
## # ... with 910 more rows, and 1 more variable: coverage <dbl>

2.1 Distribution of Fund Data Time Length by Years

107 funds has less than 1 years data

df_all %>% group_by(code) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=max(date)-min(date))%>% arrange(count) %>% group_by(length_years=as.integer(floor(length/365)) )%>% summarise(countcuts=n())%>% arrange(length_years)#%>%glimpse()
## # A tibble: 6 x 2
##   length_years countcuts
##          <int>     <int>
## 1            0       107
## 2            1        61
## 3            2       171
## 4            3        72
## 5            4        58
## 6            5       451

447 funds have at least 5 calendar years of data coverage(starting from 2015-11-16 or earlier)

df_all %>% group_by(code, fund_type) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=max(date)-min(date))%>%arrange(desc(earliest)) %>% filter(earliest<=ymd("2015-11-16"))
## # A tibble: 447 x 6
## # Groups:   code [447]
##    code  fund_type count latest              earliest            length   
##    <chr> <chr>     <int> <dttm>              <dttm>              <drtn>   
##  1 DDS   mutual      633 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
##  2 IIP   mutual      636 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
##  3 IPC   mutual      637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
##  4 IPK   mutual      637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
##  5 IPO   mutual      637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
##  6 IPR   mutual      637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
##  7 IPU   mutual      637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
##  8 ISS   mutual      637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
##  9 IYR   mutual      638 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 10 AAK   mutual     1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## # ... with 437 more rows

813 funds have at least 1 calendar years of data coverage(starting from 2019-11-15 or earlier)

df_all %>% group_by(code) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=max(date)-min(date))%>%arrange((code)) %>% filter(earliest<=ymd("2019-11-15"))
## # A tibble: 813 x 5
##    code  count latest              earliest            length   
##    <chr> <int> <dttm>              <dttm>              <drtn>   
##  1 AAJ     717 2020-11-16 00:00:00 2018-01-04 00:00:00 1047 days
##  2 AAK    1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
##  3 AAL    1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
##  4 AAS    1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
##  5 AAV    1261 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
##  6 ABE    1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
##  7 ABU    1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
##  8 ACC    1038 2020-11-16 00:00:00 2016-09-29 00:00:00 1509 days
##  9 ACD    1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## 10 ACK    1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## # ... with 803 more rows

2.2 Daily Price Change

The df_plot_price dataset provides daily price change and standard deviation of the last 1 year of data. We kept only the funds which have at least 1 year of price data.

df_plot_price=df_all%>%group_by(code,fund_type,category,company_name)%>%arrange(code,date)%>%mutate(previousday=lag(price,n=1), daily_price_change=price/previousday-1)%>%filter(date>=ymd("20191115"))%>%summarise(avg_daily_change=mean(daily_price_change, na.rm = TRUE), stdev=sd(daily_price_change, na.rm = TRUE),earliest=min(date), count=n())%>%arrange(count)%>%filter(earliest<=ymd("20191118"))
df_plot_price%>%arrange(count)
## # A tibble: 812 x 8
## # Groups:   code, fund_type, category [812]
##    code  fund_type category company_name avg_daily_change   stdev
##    <chr> <chr>     <chr>    <chr>                   <dbl>   <dbl>
##  1 ATJ   mutual    Hedge U~ AZIMUT PORT~        -0.000400 0.0150 
##  2 AJE   mutual    Hedge U~ AK PORTFOY ~         0.00113  0.00712
##  3 USY   mutual    Hedge U~ UNLU PORTFO~         0.000436 0.00741
##  4 ACN   mutual    Hedge U~ ACTUS PORTF~         0.000737 0.00719
##  5 ACU   mutual    Hedge U~ ACTUS PORTF~         0.00162  0.00786
##  6 ACZ   mutual    Hedge U~ ACTUS PORTF~         0.000854 0.00596
##  7 KTS   mutual    Hedge U~ KT PORTFOY ~         0.000352 0.00129
##  8 IPF   mutual    Hedge U~ PERFORM POR~         0.000476 0.00224
##  9 IPP   mutual    Hedge U~ PERFORM POR~         0.00183  0.0128 
## 10 PPD   mutual    Hedge U~ PERFORM POR~         0.000869 0.00723
## # ... with 802 more rows, and 2 more variables: earliest <dttm>, count <int>

2.3 Annual Price Change

The df_price_change dataset provides annual price change (between 2019-11-15 and 2020-11-16)

df_price_change=df_all%>%filter(date==ymd("2019-11-15")|date==ymd("2020-11-16"))%>%group_by(code,fund_type,category,company_name)%>%arrange(code,date)%>%mutate(previous_price=lag(price),annual_change=price/previous_price-1)%>%relocate(annual_change)%>%filter(!is.na(annual_change))%>%select(-previous_price)
df_price_change
## # A tibble: 806 x 41
## # Groups:   code, fund_type, category, company_name [806]
##    annual_change OKS   participation contribution date                code 
##            <dbl> <lgl> <lgl>         <lgl>        <dttm>              <chr>
##  1        0.120  TRUE  FALSE         FALSE        2020-11-16 00:00:00 AAJ  
##  2        0.221  FALSE FALSE         FALSE        2020-11-16 00:00:00 AAK  
##  3        0.0953 FALSE FALSE         FALSE        2020-11-16 00:00:00 AAL  
##  4        0.301  FALSE FALSE         FALSE        2020-11-16 00:00:00 AAS  
##  5        0.432  FALSE FALSE         FALSE        2020-11-16 00:00:00 AAV  
##  6        0.209  FALSE FALSE         FALSE        2020-11-16 00:00:00 ABE  
##  7        0.0862 FALSE FALSE         FALSE        2020-11-16 00:00:00 ABU  
##  8        0.339  FALSE FALSE         FALSE        2020-11-16 00:00:00 ACC  
##  9        0.338  FALSE FALSE         FALSE        2020-11-16 00:00:00 ACD  
## 10        0.305  FALSE FALSE         FALSE        2020-11-16 00:00:00 ACK  
## # ... with 796 more rows, and 35 more variables: fund_type <chr>,
## #   category <chr>, name <chr>, price <dbl>, shares <dbl>, people <dbl>,
## #   total_value <dbl>, p_Bank_Bills <dbl>, p_Other <dbl>,
## #   p_FX_Payable_Bills <dbl>, p_Government_Bond <dbl>,
## #   p_Foreign_Currency_Bills <dbl>, p_Eurobonds <dbl>,
## #   p_Commercial_Paper <dbl>, p_Fund_Participation_Certificate <dbl>,
## #   p_Real_Estate_Certificate <dbl>, p_Treasury_Bill <dbl>, p_Stock <dbl>,
## #   p_Government_Bonds_and_Bills_FX <dbl>, p_Participation_Account <dbl>,
## #   p_Government_Lease_Certificates <dbl>, p_Precious_Metals <dbl>,
## #   p_Private_Sector_Lease_Certificates <dbl>, p_Private_Sector_Bond <dbl>,
## #   p_Repo <dbl>, p_Derivatives <dbl>, p_TMM <dbl>, p_Reverse_Repo <dbl>,
## #   p_Asset_Backed_Securities <dbl>, p_Term_Deposit <dbl>,
## #   p_Foreign_Debt_Instruments <dbl>, p_Foreign_Equity <dbl>,
## #   p_Foreign_Securities <dbl>, company_code <chr>, company_name <chr>

2.4 Annual Price Change vs Daily Average Standard Deviation

Join annual price change data to daily averages and standard deviation. The df_plot_price dataset provides us information.

df_plot_price=left_join(df_price_change,df_plot_price%>%select(code, avg_daily_change, stdev), by="code")%>%select(-fund_type.y, -category.y)%>%rename(fund_type=fund_type.x,category=category.x)%>%relocate(avg_daily_change,stdev)%>%glimpse()
## Rows: 806
## Columns: 43
## Groups: code, company_name [806]
## $ avg_daily_change                    <dbl> 0.0004518532, 0.0007945988, 0.0...
## $ stdev                               <dbl> 0.0021900905, 0.0037024105, 0.0...
## $ annual_change                       <dbl> 0.119639528, 0.221075413, 0.095...
## $ OKS                                 <lgl> TRUE, FALSE, FALSE, FALSE, FALS...
## $ participation                       <lgl> FALSE, FALSE, FALSE, FALSE, FAL...
## $ contribution                        <lgl> FALSE, FALSE, FALSE, FALSE, FAL...
## $ date                                <dttm> 2020-11-16, 2020-11-16, 2020-1...
## $ code                                <chr> "AAJ", "AAK", "AAL", "AAS", "AA...
## $ fund_type                           <chr> "pension", "mutual", "mutual", ...
## $ category                            <chr> "AES Standard Fund", "Variable ...
## $ name                                <chr> "AVIVASA EMEKLILIK VE HAYAT A.S...
## $ price                               <dbl> 0.014412, 41.390035, 0.621141, ...
## $ shares                              <dbl> 35933708934, 1897355, 316664117...
## $ people                              <dbl> 366643, 432, 3249, 97, 34, 1072...
## $ total_value                         <dbl> 517888264, 78531591, 196693215,...
## $ p_Bank_Bills                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Other                             <dbl> 0.00, 0.00, 0.01, 0.00, 0.00, 0...
## $ p_FX_Payable_Bills                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Government_Bond                   <dbl> 50.21, 10.31, 0.00, 0.00, 0.00,...
## $ p_Foreign_Currency_Bills            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Eurobonds                         <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Commercial_Paper                  <dbl> 15.08, 11.41, 7.63, 0.00, 0.00,...
## $ p_Fund_Participation_Certificate    <dbl> 13.10, 14.22, 0.00, 95.56, 0.00...
## $ p_Real_Estate_Certificate           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Treasury_Bill                     <dbl> 1.41, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Stock                             <dbl> 9.93, 29.94, 0.00, 0.00, 97.75,...
## $ p_Government_Bonds_and_Bills_FX     <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Participation_Account             <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Government_Lease_Certificates     <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Precious_Metals                   <dbl> 1.15, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Private_Sector_Lease_Certificates <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Private_Sector_Bond               <dbl> 0.00, 3.18, 0.00, 0.00, 0.00, 0...
## $ p_Repo                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Derivatives                       <dbl> 0.00, 4.18, 5.36, 10.40, 2.25, ...
## $ p_TMM                               <dbl> 0.00, 0.00, 0.00, -5.96, 0.00, ...
## $ p_Reverse_Repo                      <dbl> 1.94, 26.76, 87.00, 0.00, 0.00,...
## $ p_Asset_Backed_Securities           <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Term_Deposit                      <dbl> 7.18, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Foreign_Debt_Instruments          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Foreign_Equity                    <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 9...
## $ p_Foreign_Securities                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ company_code                        <chr> "AHS", "APY", "APY", "APY", "AP...
## $ company_name                        <chr> "AVIVASA EMEKLILIK VE HAYAT A.S...

3 Analyzes with Plots

3.1 Annual Price Change vs Standard Deviation of Daily Change

ggplot(df_plot_price)+geom_point(aes(x=avg_daily_change, y=stdev))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")+facet_wrap(vars(category)) +
  labs(title = 'Annual Price Change vs Daily Change Std', x='Annual Price Change', y='Daily Change Std') 

## Average Daily Price Change vs Standart Deviation of Daily price Change

ggplot(df_plot_price)+geom_point(aes(x=annual_change, y=stdev))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")+facet_wrap(vars(category)) +
  labs(title = 'Avg Daily Change vs Daily Change Std s', x='Avg Daily Change', y='Daily Change Std') 

3.2 Annual Price Change vs Standart Deviation of Daily Change for Contribution

ggplot(df_plot_price%>%filter(contribution==TRUE))+geom_point(aes(x=annual_change, y=stdev,color=category))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom") +
labs(title = 'Annual Price Change vs Daily Change Std of Contribution', x='Annual Price Change', y='Daily Change Std') 

3.3 Annual Price Change vs Standart Deviation of Daily Change for Categories

df_plot_categories=df_plot_price%>%group_by(category, fund_type)%>%summarize(avg_daily_change=mean(avg_daily_change,na.rm=TRUE),stdev=mean(stdev,na.rm = TRUE), annual_change=mean(annual_change,na.rm=TRUE))%>%glimpse()
## `summarise()` regrouping output by 'category' (override with `.groups` argument)
## Rows: 30
## Columns: 5
## Groups: category [30]
## $ category         <chr> "AES Participation Standard Fund", "AES Standard F...
## $ fund_type        <chr> "pension", "pension", "pension", "mutual", "pensio...
## $ avg_daily_change <dbl> 0.0005727254, 0.0004585093, 0.0005385514, 0.000531...
## $ stdev            <dbl> 0.0015402946, 0.0021006843, 0.0043487023, 0.002251...
## $ annual_change    <dbl> 0.15607567, 0.12183272, 0.14471099, 0.14463202, 0....
ggplot(df_plot_categories)+geom_point(aes(x=annual_change, y=stdev, color=category))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")+facet_wrap(vars(fund_type)) +
  labs(title = 'Annual Price Change vs Daily Change Std for Categories', x='Annual Price Change', y='Daily Change Std') 

3.4 Annual Price Change vs Standart Deviation of Daily Change for Companies

df_plot_companies=df_plot_price%>%group_by(company_name, fund_type)%>%summarize(avg_daily_change=mean(avg_daily_change,na.rm=TRUE),stdev=mean(stdev,na.rm = TRUE), annual_change=mean(annual_change,na.rm=TRUE))
## `summarise()` regrouping output by 'company_name' (override with `.groups` argument)
ggplot(df_plot_companies)+geom_point(aes(x=annual_change, y=stdev, color=company_name))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")+facet_wrap(vars(fund_type)) +
  labs(title = 'Annual Price Change vs Daily Change Std for Companies', x='Annual Price Change', y='Daily Change Std') 

## ggplot(df_plot_categories)+geom_point(aes(x=price_change, ## y=sd_mean))+scale_x_log10()#+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")

3.5 Distibution of Funds Valuation For Contribution

df_today=df_all %>% filter(date==ymd("2020-11-16"))%>%mutate(total_value_millions=round(total_value/1000000,2), total_value_bins=floor(total_value/10000000))
ggplot(df_today)+geom_histogram(bins=50,aes(x=total_value_millions))+facet_grid(rows=vars(contribution)) +
    labs( x='Total Value in Million', y='Count') 

3.6 Distibution of Number of People in Funds For Fund Types

df_today_investors=df_all %>% filter(date==ymd("2020-11-16"))
ggplot(df_today_investors)+geom_histogram(bins=50,aes(x=people))+facet_wrap(vars(fund_type)) +
  labs( x='People', y='Count') 

4 Funds Comparison in Last 1 Year

We compared funds and categories based on price change in last one year. Firstly, we created datasets for analyzes

df_2 = df_all %>%
  filter(date == ymd("2019-11-18") | date == ymd("2020-11-16")) %>%
  select(date,fund_type, category, code, company_name ,name, total_value, price) %>%
  arrange(code, name, date)
df_3 = df_2 %>%
  group_by(code, name) %>%
  mutate(previous_price=as.numeric(lag(price,n=1)), 
         change_price_percentage=100*(price-as.numeric(lag(price,n=1)))/ as.numeric(lag(price,n=1)),
         previous_total_value = as.numeric(lag(total_value,n=1)),
         change_total_value_percentage=100*(total_value-as.numeric(lag(total_value,n=1)))/ as.numeric(lag(total_value,n=1)))%>%
  filter(date==ymd('2020-11-16')) %>%
  select(date,fund_type, category, code, name, price, previous_price, change_price_percentage, total_value, previous_total_value,change_total_value_percentage )

4.1 Most Valued 40 Funds

order_price_best = df_3 %>%
  arrange(desc(change_price_percentage)) %>%
  select(fund_type, category, code, name, previous_price, price, change_price_percentage)
best_price_funds=order_price_best[1:40, ]
best_price_funds
## # A tibble: 40 x 7
## # Groups:   code, name [40]
##    fund_type category  code  name        previous_price   price change_price_pe~
##    <chr>     <chr>     <chr> <chr>                <dbl>   <dbl>            <dbl>
##  1 mutual    Hedge Um~ HDH   HEDEF PORT~       1.12     1.28e+1            1042.
##  2 mutual    Hedge Um~ IBG   AZIMUT PYS~       0.0400   2.17e-1             442.
##  3 mutual    Hedge Um~ HPF   HEDEF PORT~       1.25     6.66e+0             435.
##  4 mutual    Hedge Um~ FYA   ISTANBUL P~       0.000723 2.03e-3             181.
##  5 mutual    Stock Um~ TTE   IS PORTFOY~       0.0505   1.13e-1             124.
##  6 mutual    Hedge Um~ IAR   ISTANBUL P~       1.63     3.62e+0             122.
##  7 pension   Stock Fu~ KEH   KATILIM EM~       0.0180   3.91e-2             117.
##  8 pension   Stock Fu~ AGH   BEREKET EM~       0.0257   5.57e-2             116.
##  9 mutual    Hedge Um~ DPU   DENIZ PORF~       6.30     1.32e+1             110.
## 10 mutual    Stock Um~ TKF   TACIRLER P~       2.73     5.72e+0             110.
## # ... with 30 more rows

The best_category shows distribution of the fund categories of most 40 valued funds for last 1 year

best_category = best_price_funds %>%
  group_by(category) %>%
  count(category) %>%
  arrange(desc(n))
kable(best_category, col.names = c("Fund Category", "Number of Funds"))
Fund Category Number of Funds
Hedge Umbrella Fund 15
Stock Umbrella Fund 9
Gold Fund 7
Stock Fund 4
Variable Fund 2
Index Fund 1
Mixed Umbrella Fund 1
Variable Umbrella Fund 1

4.2 Least Valued / Depreciated 40 Funds

order_price_worst = df_3 %>%
  arrange(change_price_percentage) %>%
  select(fund_type, category, code, name, previous_price, price, change_price_percentage)
worst_price_funds=order_price_worst[1:40, ]
worst_price_funds
## # A tibble: 40 x 7
## # Groups:   code, name [40]
##    fund_type category   code  name       previous_price   price change_price_pe~
##    <chr>     <chr>      <chr> <chr>               <dbl>   <dbl>            <dbl>
##  1 mutual    Hedge Umb~ KOP   QINVEST P~         1.50   0.860             -42.7 
##  2 mutual    Fund Of F~ AES   AK PORTFO~         0.0151 0.00888           -41.2 
##  3 mutual    Hedge Umb~ ATJ   AZIMUT PO~         1.19   1.06              -10.5 
##  4 mutual    Hedge Umb~ AVC   ATA PORTF~         0.957  0.887              -7.35
##  5 mutual    Hedge Umb~ IBE   ISTANBUL ~         1.07   1                  -6.86
##  6 mutual    Hedge Umb~ OSH   OSMANLI P~         0.0277 0.0258             -6.58
##  7 mutual    Hedge Umb~ KSY   YAPI KRED~         1.01   0.986              -2.48
##  8 mutual    Stock Umb~ TAU   IS PORTFO~         0.0527 0.0517             -1.94
##  9 mutual    Stock Umb~ ADP   AK PORTFO~         0.104  0.102              -1.33
## 10 mutual    Hedge Umb~ STZ   STRATEJI ~         0.927  0.945               1.89
## # ... with 30 more rows

The wors_category shows distribution of the fund categories of least valued / depreciated 40 funds for last 1 year

worst_category = worst_price_funds %>%
  group_by(category) %>%
  count(category) %>%
  arrange(desc(n))
kable(worst_category, col.names = c("Fund Category", "Number of Funds"))
Fund Category Number of Funds
Hedge Umbrella Fund 11
Debt Securities Umbrella Fund 10
Participation Umbrella Fund 6
Debt Instruments Fund 3
Variable Umbrella Fund 3
Fund Of Funds Umbrella Fund 2
Stock Umbrella Fund 2
Government Lease Certificates Fund 1
Govt. Bonds and Bills Fund 1
Variable Fund 1