ISLR实验:R语言简介
目录
本文源自《统计学习导论:基于R语言应用》(ISLR) 中《2.3 实验:R语言简介》章节
注:本文在 Jupyter Notebook 中运行,为了正常显示,绝大部分语句都添加了 print()
函数。如果在命令行交互模式下执行,则无需 print()
函数。
基本命令
c()
函数创建向量,<-
进行赋值
x <- c(1, 3, 2, 5)
print(x)
[1] 1 3 2 5
也可以使用 =
进行赋值,但不推荐
x = c(1, 6, 2)
print(x)
[1] 1 6 2
y <- c(1, 4, 3)
使用 length()
函数获取向量长度
print(length(x))
[1] 3
print(length(y))
[1] 3
向量加法
print(x + y)
[1] 2 10 5
ls()
函数查看所有的对象列表。
rm()
函数可以删除不需要的对象。
print(ls())
[1] "x" "y"
rm(x, y)
print(ls())
character(0)
删除所有对象
rm(list=ls())
matrix()
函数创建矩阵
x = matrix(
data=c(1, 2, 3, 4),
nrow=2,
ncol=2
)
print(x)
[,1] [,2]
[1,] 1 3
[2,] 2 4
可以省略参数名称
x = matrix(
c(1, 2, 3, 4),
2,
2
)
print(x)
[,1] [,2]
[1,] 1 3
[2,] 2 4
创建矩阵时默认先填列再填行,使用 byrow=TRUE
表示先填行再填列
print(matrix(
c(1, 2, 3, 4),
2,
2,
byrow=TRUE
))
[,1] [,2]
[1,] 1 2
[2,] 3 4
print(sqrt(x))
[,1] [,2]
[1,] 1.000000 1.732051
[2,] 1.414214 2.000000
print(x^2)
[,1] [,2]
[1,] 1 9
[2,] 4 16
rnorm()
函数产生随机正态分布的向量
cor()
计算变量之间的相关系数
x = rnorm(50)
y = x + rnorm(50, mean=50, sd=.1)
print(cor(x, y))
[1] 0.9960602
set.seed()
设置随机数种子
set.seed(1303)
print(rnorm(50))
[1] -1.1439763145 1.3421293656 2.1853904757 0.5363925179 0.0631929665
[6] 0.5022344825 -0.0004167247 0.5658198405 -0.5725226890 -1.1102250073
[11] -0.0486871234 -0.6956562176 0.8289174803 0.2066528551 -0.2356745091
[16] -0.5563104914 -0.3647543571 0.8623550343 -0.6307715354 0.3136021252
[21] -0.9314953177 0.8238676185 0.5233707021 0.7069214120 0.4202043256
[26] -0.2690521547 -1.5103172999 -0.6902124766 -0.1434719524 -1.0135274099
[31] 1.5732737361 0.0127465055 0.8726470499 0.4220661905 -0.0188157917
[36] 2.6157489689 -0.6931401748 -0.2663217810 -0.7206364412 1.3677342065
[41] 0.2640073322 0.6321868074 -1.3306509858 0.0268888182 1.0406363208
[46] 1.3120237985 -0.0300020767 -0.2500257125 0.0234144857 1.6598706557
mean()
计算均值
var()
计算方差
sd()
计算标准差
set.seed(3)
y = rnorm(100)
print(mean(y))
[1] 0.01103557
print(var(y))
[1] 0.7328675
print(sqrt(var(y)))
[1] 0.8560768
print(sd(y))
[1] 0.8560768
图形
plot()
函数
x = rnorm(100)
y = rnorm(100)
plot(x, y)
添加标题
plot(
x, y,
xlab="this is the x-axis",
ylab="this is the y-axis",
main="Plot of X vs Y",
)
使用 pdf()
或 jpeg()
等函数保存图片
pdf("Figure.pdf")
plot(x, y, col="green")
dev.off()
seq()
用于生成序列
x = seq(1, 10)
print(x)
[1] 1 2 3 4 5 6 7 8 9 10
x = 1:10
print(x)
[1] 1 2 3 4 5 6 7 8 9 10
x = seq(-pi, pi, length=50)
contour
函数用于绘制等值线图
y = x
f = outer(x, y, function(x,y) cos(y)/(1 + x^2))
contour(x, y, f)
contour(
x, y, f,
nlevels=45,
add=T,
)
fa = (f - t(f)) / 2
contour(
x, y, fa,
nlevels=15
)
image()
函数绘制 heatmap
image(x, y, fa)
persp()
函数绘制三维图
persp(x, y, fa)
persp(x, y, fa, theta=30)
persp(x, y, fa, theta=30, phi=20)
persp(x, y, fa, theta=30, phi=70)
persp(x, y, fa, theta=30, phi=40)
索引数据
A = matrix(1:16, 4, 4)
print(A)
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
print(A[2, 3])
[1] 10
print(A[c(1, 3), c(2, 4)])
[,1] [,2]
[1,] 5 13
[2,] 7 15
print(A[1:3, 2:4])
[,1] [,2] [,3]
[1,] 5 9 13
[2,] 6 10 14
[3,] 7 11 15
print(A[1:2,])
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
print(A[, 1:2])
[,1] [,2]
[1,] 1 5
[2,] 2 6
[3,] 3 7
[4,] 4 8
print(A[1,])
[1] 1 5 9 13
print(A[-c(1,3),])
[,1] [,2] [,3] [,4]
[1,] 2 6 10 14
[2,] 4 8 12 16
负数索引用于去掉对应的元素
print(A[-c(1, 3), -c(1, 3, 4)])
[1] 6 8
dim()
函数返回行数和列数
print(dim(A))
[1] 4 4
载入数据
使用 Auto.data 文件
Auto = read.table("Auto.data")
fix(Auto)
将问号识别为 NA
Auto = read.table(
"Auto.data",
header=T,
na.strings="?"
)
fix(Auto)
Auto = read.csv(
"Auto.csv",
header=T,
na.strings="?"
)
print(dim(Auto))
[1] 397 9
Auto[1:4,]
删掉有 NA 的行
Auto = na.omit(Auto)
print(dim(Auto))
[1] 392 9
print(names(Auto))
[1] "mpg" "cylinders" "displacement" "horsepower" "weight"
[6] "acceleration" "year" "origin" "name"
其他的图形和数值汇总
使用 plot()
绘制定量变量的散点图
plot(Auto$cylinders, Auto$mpg)
使用 attach()
函数指定数据集后,可以直接使用数据集中的数据
attach(Auto)
plot(cylinders, mpg)
as.factor
将定量变量转换成定性变量
cylinders = as.factor(cylinders)
plot(cylinders, mpg)
plot(cylinders, mpg, col="red")
plot(
cylinders, mpg,
col="red",
varwidth=T
)
plot(
cylinders, mpg,
col="red",
varwidth=T,
horizontal=T
)
plot(
cylinders, mpg,
col="red",
varwidth=T,
xlab="cylinders",
ylab="MPG"
)
hist()
函数绘制直方图
hist(mpg)
hist(mpg, col=2)
hist(mpg, col=2, breaks=15)
pairs()
函数建立数据集中每一对变量的散点图矩阵
# 会抛出错误
# pairs(Auto)
pairs(~ mpg + displacement + horsepower + weight + acceleration, Auto)
identify()
函数用于交互
plot(horsepower, mpg)
identify(horsepower, mpg, name)
summary()
显示数据汇总信息
summary(Auto)
mpg cylinders displacement horsepower weight
Min. : 9.00 Min. :3.000 Min. : 68.0 Min. : 46.0 Min. :1613
1st Qu.:17.00 1st Qu.:4.000 1st Qu.:105.0 1st Qu.: 75.0 1st Qu.:2225
Median :22.75 Median :4.000 Median :151.0 Median : 93.5 Median :2804
Mean :23.45 Mean :5.472 Mean :194.4 Mean :104.5 Mean :2978
3rd Qu.:29.00 3rd Qu.:8.000 3rd Qu.:275.8 3rd Qu.:126.0 3rd Qu.:3615
Max. :46.60 Max. :8.000 Max. :455.0 Max. :230.0 Max. :5140
acceleration year origin name
Min. : 8.00 Min. :70.00 Min. :1.000 Length:392
1st Qu.:13.78 1st Qu.:73.00 1st Qu.:1.000 Class :character
Median :15.50 Median :76.00 Median :1.000 Mode :character
Mean :15.54 Mean :75.98 Mean :1.577
3rd Qu.:17.02 3rd Qu.:79.00 3rd Qu.:2.000
Max. :24.80 Max. :82.00 Max. :3.000
summary(mpg)
Min. 1st Qu. Median Mean 3rd Qu. Max.
9.00 17.00 22.75 23.45 29.00 46.60