knitr::opts_chunk$set(echo = TRUE)

R Markdown

Bu R Markdown dokuman R ile Istatistik Programlama Dersi’nin donem sonu odevini sunmak icin hazirlanmistir.

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages -------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ----------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
#Veri dosyadan okunur
myData<-read.csv("avocado.csv")
#Verinin ilk 10 satiri goruntulenir
head(myData,10)
##    X       Date AveragePrice Total.Volume   X4046     X4225  X4770 Total.Bags
## 1  0 2015-12-27         1.33     64236.62 1036.74  54454.85  48.16    8696.87
## 2  1 2015-12-20         1.35     54876.98  674.28  44638.81  58.33    9505.56
## 3  2 2015-12-13         0.93    118220.22  794.70 109149.67 130.50    8145.35
## 4  3 2015-12-06         1.08     78992.15 1132.00  71976.41  72.58    5811.16
## 5  4 2015-11-29         1.28     51039.60  941.48  43838.39  75.78    6183.95
## 6  5 2015-11-22         1.26     55979.78 1184.27  48067.99  43.61    6683.91
## 7  6 2015-11-15         0.99     83453.76 1368.92  73672.72  93.26    8318.86
## 8  7 2015-11-08         0.98    109428.33  703.75 101815.36  80.00    6829.22
## 9  8 2015-11-01         1.02     99811.42 1022.15  87315.57  85.34   11388.36
## 10 9 2015-10-25         1.07     74338.76  842.40  64757.44 113.00    8625.92
##    Small.Bags Large.Bags XLarge.Bags         type year region
## 1     8603.62      93.25           0 conventional 2015 Albany
## 2     9408.07      97.49           0 conventional 2015 Albany
## 3     8042.21     103.14           0 conventional 2015 Albany
## 4     5677.40     133.76           0 conventional 2015 Albany
## 5     5986.26     197.69           0 conventional 2015 Albany
## 6     6556.47     127.44           0 conventional 2015 Albany
## 7     8196.81     122.05           0 conventional 2015 Albany
## 8     6266.85     562.37           0 conventional 2015 Albany
## 9    11104.53     283.83           0 conventional 2015 Albany
## 10    8061.47     564.45           0 conventional 2015 Albany
#Verinin yapisi incelenir
str(myData)
## 'data.frame':    18249 obs. of  14 variables:
##  $ X           : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Date        : Factor w/ 169 levels "2015-01-04","2015-01-11",..: 52 51 50 49 48 47 46 45 44 43 ...
##  $ AveragePrice: num  1.33 1.35 0.93 1.08 1.28 1.26 0.99 0.98 1.02 1.07 ...
##  $ Total.Volume: num  64237 54877 118220 78992 51040 ...
##  $ X4046       : num  1037 674 795 1132 941 ...
##  $ X4225       : num  54455 44639 109150 71976 43838 ...
##  $ X4770       : num  48.2 58.3 130.5 72.6 75.8 ...
##  $ Total.Bags  : num  8697 9506 8145 5811 6184 ...
##  $ Small.Bags  : num  8604 9408 8042 5677 5986 ...
##  $ Large.Bags  : num  93.2 97.5 103.1 133.8 197.7 ...
##  $ XLarge.Bags : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ type        : Factor w/ 2 levels "conventional",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ year        : int  2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
##  $ region      : Factor w/ 54 levels "Albany","Atlanta",..: 1 1 1 1 1 1 1 1 1 1 ...
#year degiskeni factor tipinde degiskene donusturulur.
myData$year<-as.factor(myData$year)
#Veriye ait tanimlayici istatistikler incelenir
summary(myData)
##        X                 Date        AveragePrice    Total.Volume     
##  Min.   : 0.00   2015-01-04:  108   Min.   :0.440   Min.   :      85  
##  1st Qu.:10.00   2015-01-11:  108   1st Qu.:1.100   1st Qu.:   10839  
##  Median :24.00   2015-01-18:  108   Median :1.370   Median :  107377  
##  Mean   :24.23   2015-01-25:  108   Mean   :1.406   Mean   :  850644  
##  3rd Qu.:38.00   2015-02-01:  108   3rd Qu.:1.660   3rd Qu.:  432962  
##  Max.   :52.00   2015-02-08:  108   Max.   :3.250   Max.   :62505647  
##                  (Other)   :17601                                     
##      X4046              X4225              X4770           Total.Bags      
##  Min.   :       0   Min.   :       0   Min.   :      0   Min.   :       0  
##  1st Qu.:     854   1st Qu.:    3009   1st Qu.:      0   1st Qu.:    5089  
##  Median :    8645   Median :   29061   Median :    185   Median :   39744  
##  Mean   :  293008   Mean   :  295155   Mean   :  22840   Mean   :  239639  
##  3rd Qu.:  111020   3rd Qu.:  150207   3rd Qu.:   6243   3rd Qu.:  110783  
##  Max.   :22743616   Max.   :20470573   Max.   :2546439   Max.   :19373134  
##                                                                            
##    Small.Bags         Large.Bags       XLarge.Bags                 type     
##  Min.   :       0   Min.   :      0   Min.   :     0.0   conventional:9126  
##  1st Qu.:    2849   1st Qu.:    127   1st Qu.:     0.0   organic     :9123  
##  Median :   26363   Median :   2648   Median :     0.0                      
##  Mean   :  182195   Mean   :  54338   Mean   :  3106.4                      
##  3rd Qu.:   83338   3rd Qu.:  22029   3rd Qu.:   132.5                      
##  Max.   :13384587   Max.   :5719097   Max.   :551693.7                      
##                                                                             
##    year                      region     
##  2015:5615   Albany             :  338  
##  2016:5616   Atlanta            :  338  
##  2017:5722   BaltimoreWashington:  338  
##  2018:1296   Boise              :  338  
##              Boston             :  338  
##              BuffaloRochester   :  338  
##              (Other)            :16221
#Bölgelerin yıllara göre ortalama satis hacmi tablosu
regional_volume_byYear<-aggregate(x=myData$Total.Volume,by=list(myData$region,myData$year),FUN=mean)
colnames(regional_volume_byYear)<-c("region","year","mean")
regional_volume_byYear[order(regional_volume_byYear$mean,decreasing=T),]
##                  region year        mean
## 214             TotalUS 2018 21818010.59
## 160             TotalUS 2017 17591448.71
## 106             TotalUS 2016 17491914.83
## 52              TotalUS 2015 15935146.36
## 215                West 2018  3853211.88
## 208        SouthCentral 2018  3804321.23
## 169          California 2018  3489221.21
## 107                West 2016  3313243.61
## 161                West 2017  3255230.34
## 61           California 2016  3136426.76
## 154        SouthCentral 2017  3070024.68
## 115          California 2017  2996646.49
## 53                 West 2015  2929522.30
## 100        SouthCentral 2016  2924025.96
## 7            California 2015  2898148.40
## 46         SouthCentral 2015  2792832.63
## 192           Northeast 2018  2763480.70
## 209           Southeast 2018  2511852.94
## 178          GreatLakes 2018  2215028.25
## 138           Northeast 2017  2119314.16
## 84            Northeast 2016  2105106.59
## 188            Midsouth 2018  2013058.00
## 30            Northeast 2015  1955567.17
## 101           Southeast 2016  1905014.21
## 155           Southeast 2017  1856011.81
## 124          GreatLakes 2017  1786357.23
## 70           GreatLakes 2016  1709038.75
## 16           GreatLakes 2015  1628730.53
## 185          LosAngeles 2018  1585515.73
## 77           LosAngeles 2016  1568675.99
## 134            Midsouth 2017  1561825.95
## 47            Southeast 2015  1539377.01
## 131          LosAngeles 2017  1495105.42
## 80             Midsouth 2016  1483274.02
## 23           LosAngeles 2015  1425198.97
## 26             Midsouth 2015  1348287.66
## 198              Plains 2018  1167836.37
## 191             NewYork 2018   988718.97
## 144              Plains 2017   946038.03
## 90               Plains 2016   915784.94
## 36               Plains 2015   842680.68
## 181             Houston 2018   832604.88
## 174       DallasFtWorth 2018   757156.06
## 196       PhoenixTucson 2018   750533.56
## 137             NewYork 2017   706138.43
## 83              NewYork 2016   687947.72
## 29              NewYork 2015   678919.58
## 127             Houston 2017   651821.63
## 120       DallasFtWorth 2017   624914.93
## 66        DallasFtWorth 2016   612454.33
## 12        DallasFtWorth 2015   579916.43
## 142       PhoenixTucson 2017   577080.51
## 88        PhoenixTucson 2016   570035.45
## 73              Houston 2016   564469.10
## 34        PhoenixTucson 2015   549771.98
## 205        SanFrancisco 2018   540969.14
## 19              Houston 2015   532571.84
## 165 BaltimoreWashington 2018   506620.96
## 175              Denver 2018   493556.39
## 216    WestTexNewMexico 2018   491700.87
## 171             Chicago 2018   483926.32
## 162    WestTexNewMexico 2017   455669.13
## 67               Denver 2016   435651.56
## 108    WestTexNewMexico 2016   421225.30
## 187   MiamiFtLauderdale 2018   405892.53
## 54     WestTexNewMexico 2015   403145.61
## 121              Denver 2017   401500.73
## 97         SanFrancisco 2016   399323.34
## 9               Chicago 2015   398987.66
## 151        SanFrancisco 2017   395014.18
## 57  BaltimoreWashington 2016   393209.64
## 3   BaltimoreWashington 2015   390822.88
## 111 BaltimoreWashington 2017   386939.95
## 117             Chicago 2017   386610.89
## 63              Chicago 2016   380890.73
## 43         SanFrancisco 2015   379286.56
## 13               Denver 2015   376830.23
## 167              Boston 2018   359875.25
## 91             Portland 2016   358764.54
## 98              Seattle 2016   357580.95
## 199            Portland 2018   349907.94
## 145            Portland 2017   348108.06
## 164             Atlanta 2018   342975.94
## 206             Seattle 2018   330878.93
## 152             Seattle 2017   330448.73
## 79    MiamiFtLauderdale 2016   306587.12
## 59               Boston 2016   293954.95
## 133   MiamiFtLauderdale 2017   291322.93
## 113              Boston 2017   288779.93
## 203          Sacramento 2018   286156.24
## 96             SanDiego 2016   282096.64
## 44              Seattle 2015   279395.19
## 204            SanDiego 2018   278077.10
## 213               Tampa 2018   276752.29
## 193  NorthernNewEngland 2018   276507.02
## 195        Philadelphia 2018   273634.87
## 56              Atlanta 2016   272373.83
## 110             Atlanta 2017   271840.75
## 37             Portland 2015   268687.07
## 5                Boston 2015   263990.30
## 150            SanDiego 2017   256549.93
## 42             SanDiego 2015   255631.98
## 176             Detroit 2018   250956.56
## 194             Orlando 2018   246455.79
## 207       SouthCarolina 2018   243170.06
## 25    MiamiFtLauderdale 2015   241985.69
## 2               Atlanta 2015   223381.71
## 95           Sacramento 2016   222336.83
## 149          Sacramento 2017   218796.50
## 139  NorthernNewEngland 2017   213776.33
## 85   NorthernNewEngland 2016   212902.00
## 87         Philadelphia 2016   212577.85
## 41           Sacramento 2015   211351.33
## 141        Philadelphia 2017   210106.08
## 105               Tampa 2016   208468.74
## 33         Philadelphia 2015   200886.73
## 200   RaleighGreensboro 2018   199537.30
## 159               Tampa 2017   193869.22
## 122             Detroit 2017   193513.44
## 31   NorthernNewEngland 2015   193217.56
## 153       SouthCarolina 2017   188386.48
## 184            LasVegas 2018   187234.65
## 180 HartfordSpringfield 2018   186989.19
## 86              Orlando 2016   184757.57
## 68              Detroit 2016   181802.49
## 99        SouthCarolina 2016   179835.17
## 140             Orlando 2017   177466.06
## 14              Detroit 2015   172880.57
## 190    NewOrleansMobile 2018   169331.69
## 201     RichmondNorfolk 2018   166450.96
## 51                Tampa 2015   164726.90
## 130            LasVegas 2017   164470.12
## 179  HarrisburgScranton 2018   162575.49
## 76             LasVegas 2016   162567.44
## 45        SouthCarolina 2015   156210.26
## 146   RaleighGreensboro 2017   151213.66
## 72  HartfordSpringfield 2016   150941.83
## 172    CincinnatiDayton 2018   149928.64
## 22             LasVegas 2015   149446.43
## 170           Charlotte 2018   149412.07
## 189           Nashville 2018   146348.06
## 18  HartfordSpringfield 2015   145790.80
## 126 HartfordSpringfield 2017   144552.87
## 118    CincinnatiDayton 2017   142893.19
## 32              Orlando 2015   141534.39
## 82     NewOrleansMobile 2016   140772.75
## 92    RaleighGreensboro 2016   140075.52
## 136    NewOrleansMobile 2017   135155.68
## 64     CincinnatiDayton 2016   134816.02
## 147     RichmondNorfolk 2017   128744.57
## 71   HarrisburgScranton 2016   126200.23
## 93      RichmondNorfolk 2016   124404.09
## 125  HarrisburgScranton 2017   124003.00
## 38    RaleighGreensboro 2015   123243.60
## 135           Nashville 2017   122062.69
## 28     NewOrleansMobile 2015   121772.19
## 182        Indianapolis 2018   121194.78
## 173            Columbus 2018   118495.83
## 10     CincinnatiDayton 2015   113040.15
## 183        Jacksonville 2018   112850.18
## 116           Charlotte 2017   112222.55
## 39      RichmondNorfolk 2015   112029.60
## 17   HarrisburgScranton 2015   111902.93
## 177         GrandRapids 2018   107060.97
## 211             StLouis 2018   105735.83
## 202             Roanoke 2018   103782.08
## 103             StLouis 2016   102350.73
## 62            Charlotte 2016   101795.39
## 81            Nashville 2016   100953.60
## 119            Columbus 2017    96605.78
## 49              StLouis 2015    95217.07
## 128        Indianapolis 2017    94350.95
## 15          GrandRapids 2015    93662.91
## 75         Jacksonville 2016    93383.04
## 8             Charlotte 2015    91224.46
## 168    BuffaloRochester 2018    88492.42
## 74         Indianapolis 2016    88119.75
## 69          GrandRapids 2016    87950.69
## 65             Columbus 2016    87110.26
## 129        Jacksonville 2017    86729.13
## 157             StLouis 2017    84793.61
## 27            Nashville 2015    83287.66
## 123         GrandRapids 2017    82589.22
## 197          Pittsburgh 2018    79229.99
## 20         Indianapolis 2015    78740.98
## 148             Roanoke 2017    76279.36
## 11             Columbus 2015    75478.67
## 94              Roanoke 2016    73911.70
## 114    BuffaloRochester 2017    73021.75
## 60     BuffaloRochester 2016    69279.92
## 21         Jacksonville 2015    69004.58
## 40              Roanoke 2015    65180.89
## 143          Pittsburgh 2017    64609.70
## 163              Albany 2018    64249.42
## 186          Louisville 2018    61315.04
## 6      BuffaloRochester 2015    56665.72
## 89           Pittsburgh 2016    53040.36
## 210             Spokane 2018    51078.89
## 55               Albany 2016    50618.61
## 166               Boise 2018    50614.98
## 132          Louisville 2017    49623.67
## 109              Albany 2017    49354.55
## 78           Louisville 2016    48810.85
## 156             Spokane 2017    48599.32
## 102             Spokane 2016    48136.22
## 112               Boise 2017    44910.96
## 58                Boise 2016    44745.28
## 35           Pittsburgh 2015    43653.84
## 212            Syracuse 2018    43624.49
## 24           Louisville 2015    41240.42
## 48              Spokane 2015    40208.53
## 1                Albany 2015    38749.00
## 4                 Boise 2015    36388.05
## 158            Syracuse 2017    35207.34
## 104            Syracuse 2016    32974.70
## 50             Syracuse 2015    26291.67
#Yillara göre satis hacmi ortalamaları boxplotlari
p <- ggplot(regional_volume_byYear, aes(x=year, y=mean,color=year)) + 
  geom_boxplot()
p

#TotalUS degeri ABD'deki tum bolgeler icin toplam satis hacmini belirtiyor. Bu degerleri veriseitnden
#cikarip tekrar boxplot ciziyoruz
q <- ggplot(regional_volume_byYear[which(regional_volume_byYear$region!="TotalUS"),], 
            aes(x=year, y=mean,color=year)) + geom_boxplot()
q