2  inclass1

Author

Gözde Uğur

Published

January 2, 2024

2.0.1 Amazon Products Dataset 2023

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# Using read.csv()

myData = read.csv("/Users/gozde.ugur/Downloads/archive (3)/amazon_products.csv") 
en_cok_satanlar <- myData %>% 
                 arrange(desc(boughtInLastMonth)) %>%  # satisadedi sütununa göre azalan sırala
                 head(5)  # İlk 5 kaydı getirilk_5_kayit 
# Show only selected columns
top_5 <- en_cok_satanlar[, c("title","stars", "price" ,"boughtInLastMonth")]
print(top_5)
                                                                                                                                                                            title
1                                                                                                        Bounty Quick Size Paper Towels, White, 8 Family Rolls = 20 Regular Rolls
2                                            Amazon Brand - Presto! Flex-a-Size Paper Towels, 158 Sheet Huge Roll, 12 Rolls (2 Packs of 6), Equivalent to 38 Regular Rolls, White
3                                                                                                             Stardrops - The Pink Stuff - The Miracle All Purpose Cleaning Paste
4                                                                              Amazon Basics 2-Ply Paper Towels, Flex-Sheets, 150 Sheets per Roll, 12 Rolls (2 Packs of 6), White
5 Hismile v34 Colour Corrector, Tooth Stain Removal, Teeth Whitening Booster, Purple Toothpaste, Colour Correcting, Hismile V34, Hismile Colour Corrector, Tooth Colour Corrector
  stars price boughtInLastMonth
1   4.8 24.42            100000
2   4.7 28.28            100000
3   4.4  4.99            100000
4   4.2 22.86            100000
5   3.4 20.69            100000
#kategori bazında grupla, satışları topla
category_sales <- myData %>% 
                  group_by(category_id) %>%
                  summarise(category_sale=sum(boughtInLastMonth)) %>%
                  ungroup()


print(category_sales)
# A tibble: 248 × 2
   category_id category_sale
         <int>         <int>
 1           1       1099750
 2           2         67400
 3           3        262100
 4           4        145050
 5           5        500500
 6           6       1509950
 7           7         46650
 8           8        128550
 9           9        197550
10          10       2092300
# ℹ 238 more rows
#kategori bazında ortalama fiyat
category_mean_prices <- myData %>% 
                  group_by(category_id) %>%
                  summarise(mean_price=mean(price),median_price=median(price) ) %>%
                  ungroup()


print(category_mean_prices)
# A tibble: 248 × 3
   category_id mean_price median_price
         <int>      <dbl>        <dbl>
 1           1       15.6         9.99
 2           2       28.4        11.0 
 3           3       16.8        12.8 
 4           4       78.2        22.0 
 5           5       15.4         9.99
 6           6       15.0         9.99
 7           7       19.2        14.3 
 8           8       18.7        14.9 
 9           9       20.6        15.0 
10          10       17.5        13.0 
# ℹ 238 more rows
glimpse(myData)
Rows: 1,426,337
Columns: 11
$ asin              <chr> "B014TMV5YE", "B07GDLCQXV", "B07XSCCZYG", "B08MVFKGJ…
$ title             <chr> "Sion Softside Expandable Roller Luggage, Black, Che…
$ imgUrl            <chr> "https://m.media-amazon.com/images/I/815dLQKYIYL._AC…
$ productURL        <chr> "https://www.amazon.com/dp/B014TMV5YE", "https://www…
$ stars             <dbl> 4.5, 4.5, 4.6, 4.6, 4.5, 4.5, 4.5, 4.5, 4.5, 4.4, 4.…
$ reviews           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ price             <dbl> 139.99, 169.99, 365.49, 291.59, 174.99, 144.49, 169.…
$ listPrice         <dbl> 0.00, 209.99, 429.99, 354.37, 309.99, 0.00, 0.00, 0.…
$ category_id       <int> 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 10…
$ isBestSeller      <chr> "False", "False", "False", "False", "False", "False"…
$ boughtInLastMonth <int> 2000, 1000, 300, 400, 400, 500, 400, 100, 500, 200, …