Read clean data set “df_clean.rds” that we produced at the previous step Preprocessing Step 2: Data Cleaining & Transforming
df_all = readRDS("df_clean.rds") %>% glimpse()
## Rows: 826,638
## Columns: 40
## $ OKS <lgl> FALSE, FALSE, FALSE, FALSE, FAL...
## $ participation <lgl> FALSE, TRUE, FALSE, FALSE, FALS...
## $ contribution <lgl> FALSE, FALSE, FALSE, FALSE, FAL...
## $ date <dttm> 2015-12-31, 2015-12-31, 2015-1...
## $ code <chr> "ABE", "AEA", "AEB", "AEC", "AE...
## $ fund_type <chr> "pension", "pension", "pension"...
## $ category <chr> "Variable Fund", "Gold Fund", "...
## $ name <chr> "ANADOLU HAYAT EMEKLILIK A.S.B....
## $ price <dbl> 0.012212, 0.011026, 0.033588, 0...
## $ shares <dbl> 3876175452, 10761804554, 219965...
## $ people <dbl> 25887, 97857, 13269, 101, 5030,...
## $ total_value <dbl> 47336763, 118662196, 73881771, ...
## $ p_Bank_Bills <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Other <dbl> 3.02, 94.31, 1.90, 0.39, 1.79, ...
## $ p_FX_Payable_Bills <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Government_Bond <dbl> 0.66, 0.00, 0.84, 60.90, 66.88,...
## $ p_Foreign_Currency_Bills <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Eurobonds <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Commercial_Paper <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Fund_Participation_Certificate <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Real_Estate_Certificate <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Treasury_Bill <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Stock <dbl> 0.00, 0.88, 82.55, 4.76, 29.47,...
## $ p_Government_Bonds_and_Bills_FX <dbl> 0.00, 0.00, 0.00, 11.09, 0.00, ...
## $ p_Participation_Account <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Government_Lease_Certificates <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Precious_Metals <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Private_Sector_Lease_Certificates <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Private_Sector_Bond <dbl> 0.00, 4.81, 0.00, 22.78, 0.00, ...
## $ p_Repo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Derivatives <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_TMM <dbl> 0.00, 0.00, 6.61, 0.08, 0.00, 7...
## $ p_Reverse_Repo <dbl> 0.40, 0.00, 8.10, 0.00, 1.86, 2...
## $ p_Asset_Backed_Securities <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Term_Deposit <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Foreign_Debt_Instruments <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Foreign_Equity <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Foreign_Securities <dbl> 95.92, 0.00, 0.00, 0.00, 0.00, ...
## $ company_code <chr> "AEM", "AEM", "AHS", "HYS", "KH...
## $ company_name <chr> "ANADOLU HAYAT EMEKLILIK A.S.",...
df_all
covers all dataset
unique number of funds
df_all %>% distinct(code) %>% summarise(count=n())
## # A tibble: 1 x 1
## count
## <int>
## 1 920
Unique number of funds by types
df_all %>% group_by(fund_type)%>% distinct(code) %>% summarise(count=n())
## # A tibble: 2 x 2
## fund_type count
## <chr> <int>
## 1 mutual 516
## 2 pension 404
Unique number of funds by types and categories
df_all %>% group_by(fund_type, category)%>% distinct(code) %>% summarise(count=n())%>%arrange(desc(count))
## # A tibble: 31 x 3
## # Groups: fund_type [2]
## fund_type category count
## <chr> <chr> <int>
## 1 mutual Hedge Umbrella Fund 231
## 2 pension Variable Fund 166
## 3 mutual Debt Securities Umbrella Fund 70
## 4 mutual Variable Umbrella Fund 67
## 5 mutual Stock Umbrella Fund 62
## 6 mutual Participation Umbrella Fund 30
## 7 pension Stock Fund 28
## 8 mutual Money Market Umbrella Fund 25
## 9 pension Debt Instruments Fund 25
## 10 mutual Fund Of Funds Umbrella Fund 17
## # ... with 21 more rows
Date coverage of data: max length of our data set is 1834 days with 1262 data points. No fund data during weekends and official holidays.
df_all %>% distinct(date) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=max(date)-min(date))
## # A tibble: 1 x 4
## count latest earliest length
## <int> <dttm> <dttm> <drtn>
## 1 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
total number of data points and date coverage for each fund. Some funds have huge gaps in their data. Ratio of data points to length of time is as low as 25% for some funds while majority of funds it is over 65%. almost all of the funds with data irregularities are Hedge Umberella Funds
df_all %>% group_by(code, fund_type,category) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=as.integer(max(date)-min(date))) %>% mutate(coverage=count/length)%>%arrange((coverage))
## # A tibble: 920 x 8
## # Groups: code, fund_type [920]
## code fund_type category count latest earliest length
## <chr> <chr> <chr> <int> <dttm> <dttm> <int>
## 1 USY mutual Hedge U~ 462 2020-11-16 00:00:00 2015-12-01 00:00:00 1812
## 2 IPF mutual Hedge U~ 466 2020-11-16 00:00:00 2015-12-01 00:00:00 1812
## 3 IPP mutual Hedge U~ 466 2020-11-16 00:00:00 2015-12-01 00:00:00 1812
## 4 OSH mutual Hedge U~ 466 2020-11-16 00:00:00 2015-12-01 00:00:00 1812
## 5 CTV mutual Hedge U~ 34 2020-11-16 00:00:00 2020-07-09 00:00:00 130
## 6 TDB mutual Hedge U~ 481 2020-11-16 00:00:00 2015-11-30 00:00:00 1813
## 7 IAF mutual Hedge U~ 489 2020-11-16 00:00:00 2015-12-01 00:00:00 1812
## 8 TPP mutual Hedge U~ 461 2020-11-16 00:00:00 2016-05-02 00:00:00 1659
## 9 YKS mutual Hedge U~ 513 2020-11-16 00:00:00 2015-11-17 00:00:00 1826
## 10 YPF mutual Hedge U~ 507 2020-11-16 00:00:00 2016-02-01 00:00:00 1750
## # ... with 910 more rows, and 1 more variable: coverage <dbl>
107 funds has less than 1 years data
df_all %>% group_by(code) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=max(date)-min(date))%>% arrange(count) %>% group_by(length_years=as.integer(floor(length/365)) )%>% summarise(countcuts=n())%>% arrange(length_years)#%>%glimpse()
## # A tibble: 6 x 2
## length_years countcuts
## <int> <int>
## 1 0 107
## 2 1 61
## 3 2 171
## 4 3 72
## 5 4 58
## 6 5 451
447 funds have at least 5 calendar years of data coverage(starting from 2015-11-16 or earlier)
df_all %>% group_by(code, fund_type) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=max(date)-min(date))%>%arrange(desc(earliest)) %>% filter(earliest<=ymd("2015-11-16"))
## # A tibble: 447 x 6
## # Groups: code [447]
## code fund_type count latest earliest length
## <chr> <chr> <int> <dttm> <dttm> <drtn>
## 1 DDS mutual 633 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 2 IIP mutual 636 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 3 IPC mutual 637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 4 IPK mutual 637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 5 IPO mutual 637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 6 IPR mutual 637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 7 IPU mutual 637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 8 ISS mutual 637 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 9 IYR mutual 638 2020-11-16 00:00:00 2015-11-11 00:00:00 1832 days
## 10 AAK mutual 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## # ... with 437 more rows
813 funds have at least 1 calendar years of data coverage(starting from 2019-11-15 or earlier)
df_all %>% group_by(code) %>% summarise(count=n(), latest=max(date), earliest=min(date), length=max(date)-min(date))%>%arrange((code)) %>% filter(earliest<=ymd("2019-11-15"))
## # A tibble: 813 x 5
## code count latest earliest length
## <chr> <int> <dttm> <dttm> <drtn>
## 1 AAJ 717 2020-11-16 00:00:00 2018-01-04 00:00:00 1047 days
## 2 AAK 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## 3 AAL 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## 4 AAS 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## 5 AAV 1261 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## 6 ABE 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## 7 ABU 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## 8 ACC 1038 2020-11-16 00:00:00 2016-09-29 00:00:00 1509 days
## 9 ACD 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## 10 ACK 1262 2020-11-16 00:00:00 2015-11-09 00:00:00 1834 days
## # ... with 803 more rows
The df_plot_price
dataset provides daily price change and standard deviation of the last 1 year of data. We kept only the funds which have at least 1 year of price data.
df_plot_price=df_all%>%group_by(code,fund_type,category,company_name)%>%arrange(code,date)%>%mutate(previousday=lag(price,n=1), daily_price_change=price/previousday-1)%>%filter(date>=ymd("20191115"))%>%summarise(avg_daily_change=mean(daily_price_change, na.rm = TRUE), stdev=sd(daily_price_change, na.rm = TRUE),earliest=min(date), count=n())%>%arrange(count)%>%filter(earliest<=ymd("20191118"))
df_plot_price%>%arrange(count)
## # A tibble: 812 x 8
## # Groups: code, fund_type, category [812]
## code fund_type category company_name avg_daily_change stdev
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 ATJ mutual Hedge U~ AZIMUT PORT~ -0.000400 0.0150
## 2 AJE mutual Hedge U~ AK PORTFOY ~ 0.00113 0.00712
## 3 USY mutual Hedge U~ UNLU PORTFO~ 0.000436 0.00741
## 4 ACN mutual Hedge U~ ACTUS PORTF~ 0.000737 0.00719
## 5 ACU mutual Hedge U~ ACTUS PORTF~ 0.00162 0.00786
## 6 ACZ mutual Hedge U~ ACTUS PORTF~ 0.000854 0.00596
## 7 KTS mutual Hedge U~ KT PORTFOY ~ 0.000352 0.00129
## 8 IPF mutual Hedge U~ PERFORM POR~ 0.000476 0.00224
## 9 IPP mutual Hedge U~ PERFORM POR~ 0.00183 0.0128
## 10 PPD mutual Hedge U~ PERFORM POR~ 0.000869 0.00723
## # ... with 802 more rows, and 2 more variables: earliest <dttm>, count <int>
The df_price_change
dataset provides annual price change (between 2019-11-15 and 2020-11-16)
df_price_change=df_all%>%filter(date==ymd("2019-11-15")|date==ymd("2020-11-16"))%>%group_by(code,fund_type,category,company_name)%>%arrange(code,date)%>%mutate(previous_price=lag(price),annual_change=price/previous_price-1)%>%relocate(annual_change)%>%filter(!is.na(annual_change))%>%select(-previous_price)
df_price_change
## # A tibble: 806 x 41
## # Groups: code, fund_type, category, company_name [806]
## annual_change OKS participation contribution date code
## <dbl> <lgl> <lgl> <lgl> <dttm> <chr>
## 1 0.120 TRUE FALSE FALSE 2020-11-16 00:00:00 AAJ
## 2 0.221 FALSE FALSE FALSE 2020-11-16 00:00:00 AAK
## 3 0.0953 FALSE FALSE FALSE 2020-11-16 00:00:00 AAL
## 4 0.301 FALSE FALSE FALSE 2020-11-16 00:00:00 AAS
## 5 0.432 FALSE FALSE FALSE 2020-11-16 00:00:00 AAV
## 6 0.209 FALSE FALSE FALSE 2020-11-16 00:00:00 ABE
## 7 0.0862 FALSE FALSE FALSE 2020-11-16 00:00:00 ABU
## 8 0.339 FALSE FALSE FALSE 2020-11-16 00:00:00 ACC
## 9 0.338 FALSE FALSE FALSE 2020-11-16 00:00:00 ACD
## 10 0.305 FALSE FALSE FALSE 2020-11-16 00:00:00 ACK
## # ... with 796 more rows, and 35 more variables: fund_type <chr>,
## # category <chr>, name <chr>, price <dbl>, shares <dbl>, people <dbl>,
## # total_value <dbl>, p_Bank_Bills <dbl>, p_Other <dbl>,
## # p_FX_Payable_Bills <dbl>, p_Government_Bond <dbl>,
## # p_Foreign_Currency_Bills <dbl>, p_Eurobonds <dbl>,
## # p_Commercial_Paper <dbl>, p_Fund_Participation_Certificate <dbl>,
## # p_Real_Estate_Certificate <dbl>, p_Treasury_Bill <dbl>, p_Stock <dbl>,
## # p_Government_Bonds_and_Bills_FX <dbl>, p_Participation_Account <dbl>,
## # p_Government_Lease_Certificates <dbl>, p_Precious_Metals <dbl>,
## # p_Private_Sector_Lease_Certificates <dbl>, p_Private_Sector_Bond <dbl>,
## # p_Repo <dbl>, p_Derivatives <dbl>, p_TMM <dbl>, p_Reverse_Repo <dbl>,
## # p_Asset_Backed_Securities <dbl>, p_Term_Deposit <dbl>,
## # p_Foreign_Debt_Instruments <dbl>, p_Foreign_Equity <dbl>,
## # p_Foreign_Securities <dbl>, company_code <chr>, company_name <chr>
Join annual price change data to daily averages and standard deviation. The df_plot_price
dataset provides us information.
df_plot_price=left_join(df_price_change,df_plot_price%>%select(code, avg_daily_change, stdev), by="code")%>%select(-fund_type.y, -category.y)%>%rename(fund_type=fund_type.x,category=category.x)%>%relocate(avg_daily_change,stdev)%>%glimpse()
## Rows: 806
## Columns: 43
## Groups: code, company_name [806]
## $ avg_daily_change <dbl> 0.0004518532, 0.0007945988, 0.0...
## $ stdev <dbl> 0.0021900905, 0.0037024105, 0.0...
## $ annual_change <dbl> 0.119639528, 0.221075413, 0.095...
## $ OKS <lgl> TRUE, FALSE, FALSE, FALSE, FALS...
## $ participation <lgl> FALSE, FALSE, FALSE, FALSE, FAL...
## $ contribution <lgl> FALSE, FALSE, FALSE, FALSE, FAL...
## $ date <dttm> 2020-11-16, 2020-11-16, 2020-1...
## $ code <chr> "AAJ", "AAK", "AAL", "AAS", "AA...
## $ fund_type <chr> "pension", "mutual", "mutual", ...
## $ category <chr> "AES Standard Fund", "Variable ...
## $ name <chr> "AVIVASA EMEKLILIK VE HAYAT A.S...
## $ price <dbl> 0.014412, 41.390035, 0.621141, ...
## $ shares <dbl> 35933708934, 1897355, 316664117...
## $ people <dbl> 366643, 432, 3249, 97, 34, 1072...
## $ total_value <dbl> 517888264, 78531591, 196693215,...
## $ p_Bank_Bills <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Other <dbl> 0.00, 0.00, 0.01, 0.00, 0.00, 0...
## $ p_FX_Payable_Bills <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Government_Bond <dbl> 50.21, 10.31, 0.00, 0.00, 0.00,...
## $ p_Foreign_Currency_Bills <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Eurobonds <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Commercial_Paper <dbl> 15.08, 11.41, 7.63, 0.00, 0.00,...
## $ p_Fund_Participation_Certificate <dbl> 13.10, 14.22, 0.00, 95.56, 0.00...
## $ p_Real_Estate_Certificate <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Treasury_Bill <dbl> 1.41, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Stock <dbl> 9.93, 29.94, 0.00, 0.00, 97.75,...
## $ p_Government_Bonds_and_Bills_FX <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Participation_Account <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Government_Lease_Certificates <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Precious_Metals <dbl> 1.15, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Private_Sector_Lease_Certificates <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Private_Sector_Bond <dbl> 0.00, 3.18, 0.00, 0.00, 0.00, 0...
## $ p_Repo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Derivatives <dbl> 0.00, 4.18, 5.36, 10.40, 2.25, ...
## $ p_TMM <dbl> 0.00, 0.00, 0.00, -5.96, 0.00, ...
## $ p_Reverse_Repo <dbl> 1.94, 26.76, 87.00, 0.00, 0.00,...
## $ p_Asset_Backed_Securities <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Term_Deposit <dbl> 7.18, 0.00, 0.00, 0.00, 0.00, 0...
## $ p_Foreign_Debt_Instruments <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ p_Foreign_Equity <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 9...
## $ p_Foreign_Securities <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0...
## $ company_code <chr> "AHS", "APY", "APY", "APY", "AP...
## $ company_name <chr> "AVIVASA EMEKLILIK VE HAYAT A.S...
ggplot(df_plot_price)+geom_point(aes(x=avg_daily_change, y=stdev))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")+facet_wrap(vars(category)) +
labs(title = 'Annual Price Change vs Daily Change Std', x='Annual Price Change', y='Daily Change Std')
## Average Daily Price Change vs Standart Deviation of Daily price Change
ggplot(df_plot_price)+geom_point(aes(x=annual_change, y=stdev))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")+facet_wrap(vars(category)) +
labs(title = 'Avg Daily Change vs Daily Change Std s', x='Avg Daily Change', y='Daily Change Std')
ggplot(df_plot_price%>%filter(contribution==TRUE))+geom_point(aes(x=annual_change, y=stdev,color=category))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom") +
labs(title = 'Annual Price Change vs Daily Change Std of Contribution', x='Annual Price Change', y='Daily Change Std')
df_plot_categories=df_plot_price%>%group_by(category, fund_type)%>%summarize(avg_daily_change=mean(avg_daily_change,na.rm=TRUE),stdev=mean(stdev,na.rm = TRUE), annual_change=mean(annual_change,na.rm=TRUE))%>%glimpse()
## `summarise()` regrouping output by 'category' (override with `.groups` argument)
## Rows: 30
## Columns: 5
## Groups: category [30]
## $ category <chr> "AES Participation Standard Fund", "AES Standard F...
## $ fund_type <chr> "pension", "pension", "pension", "mutual", "pensio...
## $ avg_daily_change <dbl> 0.0005727254, 0.0004585093, 0.0005385514, 0.000531...
## $ stdev <dbl> 0.0015402946, 0.0021006843, 0.0043487023, 0.002251...
## $ annual_change <dbl> 0.15607567, 0.12183272, 0.14471099, 0.14463202, 0....
ggplot(df_plot_categories)+geom_point(aes(x=annual_change, y=stdev, color=category))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")+facet_wrap(vars(fund_type)) +
labs(title = 'Annual Price Change vs Daily Change Std for Categories', x='Annual Price Change', y='Daily Change Std')
df_plot_companies=df_plot_price%>%group_by(company_name, fund_type)%>%summarize(avg_daily_change=mean(avg_daily_change,na.rm=TRUE),stdev=mean(stdev,na.rm = TRUE), annual_change=mean(annual_change,na.rm=TRUE))
## `summarise()` regrouping output by 'company_name' (override with `.groups` argument)
ggplot(df_plot_companies)+geom_point(aes(x=annual_change, y=stdev, color=company_name))+scale_x_log10()+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")+facet_wrap(vars(fund_type)) +
labs(title = 'Annual Price Change vs Daily Change Std for Companies', x='Annual Price Change', y='Daily Change Std')
## ggplot(df_plot_categories)+geom_point(aes(x=price_change, ## y=sd_mean))+scale_x_log10()#+scale_y_log10()+theme_minimal()+theme(legend.position="bottom")
df_today=df_all %>% filter(date==ymd("2020-11-16"))%>%mutate(total_value_millions=round(total_value/1000000,2), total_value_bins=floor(total_value/10000000))
ggplot(df_today)+geom_histogram(bins=50,aes(x=total_value_millions))+facet_grid(rows=vars(contribution)) +
labs( x='Total Value in Million', y='Count')
df_today_investors=df_all %>% filter(date==ymd("2020-11-16"))
ggplot(df_today_investors)+geom_histogram(bins=50,aes(x=people))+facet_wrap(vars(fund_type)) +
labs( x='People', y='Count')
We compared funds and categories based on price change in last one year. Firstly, we created datasets for analyzes
df_2 = df_all %>%
filter(date == ymd("2019-11-18") | date == ymd("2020-11-16")) %>%
select(date,fund_type, category, code, company_name ,name, total_value, price) %>%
arrange(code, name, date)
df_3 = df_2 %>%
group_by(code, name) %>%
mutate(previous_price=as.numeric(lag(price,n=1)),
change_price_percentage=100*(price-as.numeric(lag(price,n=1)))/ as.numeric(lag(price,n=1)),
previous_total_value = as.numeric(lag(total_value,n=1)),
change_total_value_percentage=100*(total_value-as.numeric(lag(total_value,n=1)))/ as.numeric(lag(total_value,n=1)))%>%
filter(date==ymd('2020-11-16')) %>%
select(date,fund_type, category, code, name, price, previous_price, change_price_percentage, total_value, previous_total_value,change_total_value_percentage )
order_price_best = df_3 %>%
arrange(desc(change_price_percentage)) %>%
select(fund_type, category, code, name, previous_price, price, change_price_percentage)
best_price_funds=order_price_best[1:40, ]
best_price_funds
## # A tibble: 40 x 7
## # Groups: code, name [40]
## fund_type category code name previous_price price change_price_pe~
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 mutual Hedge Um~ HDH HEDEF PORT~ 1.12 1.28e+1 1042.
## 2 mutual Hedge Um~ IBG AZIMUT PYS~ 0.0400 2.17e-1 442.
## 3 mutual Hedge Um~ HPF HEDEF PORT~ 1.25 6.66e+0 435.
## 4 mutual Hedge Um~ FYA ISTANBUL P~ 0.000723 2.03e-3 181.
## 5 mutual Stock Um~ TTE IS PORTFOY~ 0.0505 1.13e-1 124.
## 6 mutual Hedge Um~ IAR ISTANBUL P~ 1.63 3.62e+0 122.
## 7 pension Stock Fu~ KEH KATILIM EM~ 0.0180 3.91e-2 117.
## 8 pension Stock Fu~ AGH BEREKET EM~ 0.0257 5.57e-2 116.
## 9 mutual Hedge Um~ DPU DENIZ PORF~ 6.30 1.32e+1 110.
## 10 mutual Stock Um~ TKF TACIRLER P~ 2.73 5.72e+0 110.
## # ... with 30 more rows
The best_category
shows distribution of the fund categories of most 40 valued funds for last 1 year
best_category = best_price_funds %>%
group_by(category) %>%
count(category) %>%
arrange(desc(n))
kable(best_category, col.names = c("Fund Category", "Number of Funds"))
Fund Category | Number of Funds |
---|---|
Hedge Umbrella Fund | 15 |
Stock Umbrella Fund | 9 |
Gold Fund | 7 |
Stock Fund | 4 |
Variable Fund | 2 |
Index Fund | 1 |
Mixed Umbrella Fund | 1 |
Variable Umbrella Fund | 1 |
order_price_worst = df_3 %>%
arrange(change_price_percentage) %>%
select(fund_type, category, code, name, previous_price, price, change_price_percentage)
worst_price_funds=order_price_worst[1:40, ]
worst_price_funds
## # A tibble: 40 x 7
## # Groups: code, name [40]
## fund_type category code name previous_price price change_price_pe~
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 mutual Hedge Umb~ KOP QINVEST P~ 1.50 0.860 -42.7
## 2 mutual Fund Of F~ AES AK PORTFO~ 0.0151 0.00888 -41.2
## 3 mutual Hedge Umb~ ATJ AZIMUT PO~ 1.19 1.06 -10.5
## 4 mutual Hedge Umb~ AVC ATA PORTF~ 0.957 0.887 -7.35
## 5 mutual Hedge Umb~ IBE ISTANBUL ~ 1.07 1 -6.86
## 6 mutual Hedge Umb~ OSH OSMANLI P~ 0.0277 0.0258 -6.58
## 7 mutual Hedge Umb~ KSY YAPI KRED~ 1.01 0.986 -2.48
## 8 mutual Stock Umb~ TAU IS PORTFO~ 0.0527 0.0517 -1.94
## 9 mutual Stock Umb~ ADP AK PORTFO~ 0.104 0.102 -1.33
## 10 mutual Hedge Umb~ STZ STRATEJI ~ 0.927 0.945 1.89
## # ... with 30 more rows
The wors_category
shows distribution of the fund categories of least valued / depreciated 40 funds for last 1 year
worst_category = worst_price_funds %>%
group_by(category) %>%
count(category) %>%
arrange(desc(n))
kable(worst_category, col.names = c("Fund Category", "Number of Funds"))
Fund Category | Number of Funds |
---|---|
Hedge Umbrella Fund | 11 |
Debt Securities Umbrella Fund | 10 |
Participation Umbrella Fund | 6 |
Debt Instruments Fund | 3 |
Variable Umbrella Fund | 3 |
Fund Of Funds Umbrella Fund | 2 |
Stock Umbrella Fund | 2 |
Government Lease Certificates Fund | 1 |
Govt. Bonds and Bills Fund | 1 |
Variable Fund | 1 |