R 探索性数据分析

> library("tidyverse")
> library("viridis")
> library("forcats")
> library("nycflights13")
> diamonds %>%
+   mutate(id = row_number())  %>%
+   select(x, y, z, id)%>%
+   gather(key="variable",value = "value",-id) %>%
+   ggplot(mapping = aes(x = value)) +
+   geom_density() +
+   geom_rug()+
+   facet_grid(variable~.) #???????????????facet_grid
> ggplot(filter(diamonds, price < 2500), aes(x = price)) +
+   geom_histogram(binwidth = 100, center = 0)
> filter(diamonds, price < 2500)%>%
+ count(cut_width(price,100))
# A tibble: 23 x 2
   `cut_width(price, 100)`     n
                      
 1 [250,350]                  17
 2 (350,450]                 876
 3 (450,550]                1930
 4 (550,650]                2717
 5 (650,750]                2888
 6 (750,850]                2696
 7 (850,950]                2366
 8 (950,1.05e+03]           2000
 9 (1.05e+03,1.15e+03]      1510
10 (1.15e+03,1.25e+03]      1124
# ... with 13 more rows
> ggplot(filter(diamonds), aes(x = price)) +
+   geom_histogram(binwidth = 10, center = 0)
> diamonds %>%
+   mutate(ending = price %% 1000)
# A tibble: 53,940 x 11
   carat cut       color clarity depth table price     x     y     z ending
                    
 1 0.23  Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43    326
 2 0.21  Premium   E     SI1      59.8    61   326  3.89  3.84  2.31    326
 3 0.23  Good      E     VS1      56.9    65   327  4.05  4.07  2.31    327
 4 0.290 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63    334
 5 0.31  Good      J     SI2      63.3    58   335  4.34  4.35  2.75    335
 6 0.24  Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48    336
 7 0.24  Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47    336
 8 0.26  Very Good H     SI1      61.9    55   337  4.07  4.11  2.53    337
 9 0.22  Fair      E     VS2      65.1    61   337  3.87  3.78  2.49    337
10 0.23  Very Good H     VS1      59.4    61   338  4     4.05  2.39    338
# ... with 53,930 more rows
> ggplot(diamonds) +
+   geom_histogram(mapping = aes(x = price)) +
+   coord_cartesian(xlim = c(100, 5000), ylim = c(0, 3000))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
> ggplot(diamonds) +
+   geom_histogram(mapping = aes(x = price)) +
+   xlim(100, 5000) +
+   ylim(0, 3000)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning messages:
1: Removed 14714 rows containing non-finite values (stat_bin). 
2: Removed 5 rows containing missing values (geom_bar). 
> ggplot(diamonds) + 
+   geom_histogram(mapping = aes(x = y), binwidth = 0.5) +
+   coord_cartesian(ylim = c(0, 50))

你可能感兴趣的:(R 探索性数据分析)