.csv
Filelibrary(HSAUR3)
## Loading required package: tools
# Method 1:
data("Forbes2000", package = "HSAUR3")
# Method 2:
#csvForbes2000 <- read.table("Forbes2000.csv",header = TRUE, sep = ",", row.names = 1)
.xlsx
Filelibrary(xlsx)
# Method 1:
#df <- read.xlsx("<name and extension of your file>", sheetIndex = 1)
# Method 2:
#df <- read.xlsx2("<name and extension of your file>",sheetIndex = 1, startRow=2,colIndex = 2)
#Fun fact: according to the package information, the function achieves a performance of an order of magnitude faster on sheets with 100,000 cells or more.
# Method 3:
#R> library("RODBC")
#R> cnct <- odbcConnectExcel("Forbes2000.xls")
#R> sqlQuery(cnct, "select * from \"Forbes2000\\$\"")
.rda
FilesSearch .rda files and load it:
#list.files(patterns = "\\.rda")
#load("Forbes2000.rda")
#write.table(Forbes2000, file = "Forbes2000.csv", sep = ",", col.names = NA)
# Or write.csv;
# Or save(Forbes2000, file="Forbes2000.rda")
Load the Forbes2000
dataset.
# get help - ?function_name (e.g. ?nrows)
# basic meta-data
class(Forbes2000)
## [1] "data.frame"
dim(Forbes2000)
## [1] 2000 8
nrow(Forbes2000)
## [1] 2000
ncol(Forbes2000)
## [1] 8
names(Forbes2000)
## [1] "rank" "name" "country" "category" "sales"
## [6] "profits" "assets" "marketvalue"
class(Forbes2000[,"rank"])
## [1] "integer"
length(Forbes2000[,"rank"])
## [1] 2000
Forbes2000[,"name"][1]
## [1] "Citigroup"
Factor and numeric variables:
#for factor
class(Forbes2000[,"category"])
## [1] "factor"
nlevels(Forbes2000[,"category"])
## [1] 27
levels(Forbes2000[,"category"])
## [1] "Aerospace & defense" "Banking"
## [3] "Business services & supplies" "Capital goods"
## [5] "Chemicals" "Conglomerates"
## [7] "Construction" "Consumer durables"
## [9] "Diversified financials" "Drugs & biotechnology"
## [11] "Food drink & tobacco" "Food markets"
## [13] "Health care equipment & services" "Hotels restaurants & leisure"
## [15] "Household & personal products" "Insurance"
## [17] "Materials" "Media"
## [19] "Oil & gas operations" "Retailing"
## [21] "Semiconductors" "Software & services"
## [23] "Technology hardware & equipment" "Telecommunications services"
## [25] "Trading companies" "Transportation"
## [27] "Utilities"
table(Forbes2000[,"category"])
##
## Aerospace & defense Banking
## 19 313
## Business services & supplies Capital goods
## 70 53
## Chemicals Conglomerates
## 50 31
## Construction Consumer durables
## 79 74
## Diversified financials Drugs & biotechnology
## 158 45
## Food drink & tobacco Food markets
## 83 33
## Health care equipment & services Hotels restaurants & leisure
## 65 37
## Household & personal products Insurance
## 44 112
## Materials Media
## 97 61
## Oil & gas operations Retailing
## 90 88
## Semiconductors Software & services
## 26 31
## Technology hardware & equipment Telecommunications services
## 59 67
## Trading companies Transportation
## 25 80
## Utilities
## 110
#for numeric variables
class(Forbes2000[,"sales"])
## [1] "numeric"
median(Forbes2000[,"sales"])
## [1] 4.365
range(Forbes2000[,"sales"])
## [1] 0.01 256.33
summary(Forbes2000[,"sales"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.010 2.018 4.365 9.697 9.547 256.330