学习R语言：输入与输出

November 07, 2020 (最后修改: November 12, 2020)

本文内容来自《R 语言编程艺术》(The Art of R Programming)，有部分修改

连接键盘与显示器

`scan()`

scan() 函数从文件中读取或者用键盘键入一个向量

四个文件：

z1.txt

123
4 5
6

z2.txt

123
4.2 5
6

z3.txt

abc
de f
g

z4.txt

abc
123 6
y

scan("../data/z1.txt")

Read 4 items
[1] 123   4   5   6

scan("../data/z2.txt")

Read 4 items
[1] 123.0   4.2   5.0   6.0

读取 z3.txt 会出错，因为 scan() 默认使用 double 模式读取。

scan("../data/z3.txt")

Error in scan("../data/z3.txt") : scan() expected 'a real', got 'abc'

使用 what 参数修改读取模式

scan("../data/z3.txt", what="")

Read 4 items
[1] "abc" "de"  "f"   "g"

scan("../data/z4.txt", what="")

Read 4 items
[1] "abc" "123" "6"   "y"

scan() 默认情况下以空白字符 (whitespace) 作为分隔。可以使用 sep 参数手动设置

x1 <- scan(
  "../data/z3.txt", 
  what=""
)
x1

Read 4 items
[1] "abc" "de"  "f"   "g"

x2 <- scan(
  "../data/z3.txt", 
  what="", 
  sep="\n"
)
x2

Read 3 items
[1] "abc"  "de f" "g"

print(x1[2])
print(x2[2])

[1] "de"
[1] "de f"

scan() 支持从键盘读数据

v <- scan("")

1: 12 5 13
4: 3 4 5
7: 8
8: 
Read 7 items

[1] 12  5 13  3  4  5  8

使用 quiet=TRUE 关闭读取项目数的报告

x3 <- scan(
  "../data/z1.txt"
)

Read 4 items

x3 <- scan(
  "../data/z1.txt",
  quiet=TRUE
)

`readline()`

readline() 从键盘读取单行数据

w <- readline()

abc de f

[1] "abc de f"

可以增加一个可选的字符串作为提示

inits <- readline("type your initials: ")

type your initials: NM

inits

[1] "NM

输出到显示器

print() 函数

x <- 1:3
print(x^2)

[1] 1 4 9

cat() 函数

print("abc")

[1] "abc"

cat("abc")

abc

注意：使用 cat 需要手动添加换行符 "\n"

cat() 各个参数以空格分隔

[1] 1 2 3

cat(x, "abc", "de\n")

1 2 3 abc de

sep 参数设置分隔符

cat(x, "abc", "de\n", sep="")

123abcde

cat(x, "abc", "de\n", sep="\n")

1
2
3
abc
de

也可以设置为字符串向量

x <- c(5, 12, 13, 8, 88)
cat(x, sep=c(".", ".", ".", "\n", "\n"))

5.12.13.8
88

读写文件

读取数据框或矩阵

使用 read.table() 函数读取数据框

z.txt 文件

name age
John 25
Mary 28
Jim 19

z <- read.table("../data/z.txt", header=TRUE)
z

  name age
1 John  25
2 Mary  28
3  Jim  19

读取矩阵

x.txt 文件

x <- matrix(
  scan("../data/x.txt"), 
  nrow=5,
  byrow=TRUE
)
x

Read 15 items
     [,1] [,2] [,3]
[1,]    1    0    1
[2,]    1    1    1
[3,]    1    1    0
[4,]    1    1    0
[5,]    0    0    1

另一种方法先读取为数据框，再转为矩阵

read.matrix <- function(filename) {
  as.matrix(read.table(filename))
}

read.matrix("../data/x.txt")

     V1 V2 V3
[1,]  1  0  1
[2,]  1  1  1
[3,]  1  1  0
[4,]  1  1  0
[5,]  0  0  1

读取文本文件

readLines() 函数读取文本文件

z5.txt

John 25
Mary 28
Jim 19

z5 <- readLines("../data/z5.txt")
z5

[1] "John 25" "Mary 28" "Jim 19"

连接

连接 (connection) 是 R 中用于多种 I/O 操作的基本机制。

可以通过 file()，url() 等函数创建。

逐行读取前面的 z5.txt 文件

c <- file("../data/z5.txt", "r")
c

A connection with                            
description "../data/z5.txt"
class       "file"          
mode        "r"             
text        "text"          
opened      "opened"        
can read    "yes"           
can write   "no"

readLines(c, n=1)

[1] "John 25"

readLines(c, n=1)

[1] "Mary 28"

readLines(c, n=1)

[1] "Jim 19"

readLines(c, n=1)

character(0)

遇到文件结束符 (EOF) 时，返回一个空值

c <- file("../data/z5.txt", "r")
while(TRUE) {
  rl <- readLines(c, n=1)
  if (length(rl) == 0) {
    print("reached the end")
    break
  } else print(rl)
}

[1] "John 25"
[1] "Mary 28"
[1] "Jim 19"
[1] "reached the end"

seek() 重定位读取位置

c <- file("../data/z5.txt", "r")
readLines(c, n=2)

[1] "John 25" "Mary 28"

seek(con=c, where=0)

[1] 26

返回值为定位前的读取位置

readLines(c, n=1)

[1] "John 25"

使用 close() 关闭连接

close(c)

扩展案例：读取PUMS普查数据

extractpums <- function(pf,flds) {
   dtf <- data.frame()
   con <- file(pf,"r")
   repeat {  
      hrec <- readLines(con,1)
      if (length(hrec) == 0) break
      serno <- intextract(hrec,c(2,8))
      npr <- intextract(hrec,c(106,107))  
      if (npr > 0)
         for (i in 1:npr) {  
            prec <- readLines(con,1)
            person <- makerow(serno,prec,flds)
            dtf <- rbind(dtf,person)  
         }
   }
   return(dtf)
}

makerow <- function(srn,pr,fl) {
   l <- list()
   l[["serno"]] <- srn
   for (nm in names(fl)) {
      l[[nm]] <- intextract(pr,fl[[nm]])
   }
   return(l)
}

intextract <- function(s,rng) {  
   fld <- substr(s,rng[1],rng[2])
   return(as.integer(fld))  
}

测试

pumsdf <- extractpums(
  "../data/pums.short.txt",
  list(
    Gender=c(23, 23),
    Age=c(25, 26)
  )
)

head(pumsdf)

  serno Gender Age
1    29      1  83
2    29      2  81
3  5121      1  45
4  5121      2  42
5  5121      1   7
6  5943      1  35

通过URL访问远程文件

read.table() 和 scan() 等一些 I/O 函数可以使用 URL 作为参数

uci <- "http://archive.ics.uci.edu/ml/machine-learning-databases/"
uci <- paste(uci, "echocardiogram/echocardiogram.data", sep="")
ecc <- read.csv(uci)

head(ecc)

  X11 X0 X71 X0.1 X0.260     X9 X4.600  X14    X1  X1.1 name X1.2 X0.2
1  19  0  72    0  0.380      6  4.100   14 1.700 0.588 name    1    0
2  16  0  55    0  0.260      4  3.420   14     1     1 name    1    0
3  57  0  60    0  0.253 12.062  4.603   16 1.450 0.788 name    1    0
4  19  1  57    0  0.160     22  5.750   18 2.250 0.571 name    1    0
5  26  0  68    0  0.260      5  4.310   12     1 0.857 name    1    0
6  13  0  62    0  0.230     31  5.430 22.5 1.875 0.857 name    1    0

写文件

write.table() 将数据框写入文件

kids <- c("Jack", "Jill")
ages <- c(12, 10)
d <- data.frame(
  kids, ages, 
  stringsAsFactors=FALSE
)
d

  kids ages
1 Jack   12
2 Jill   10

write.table(d, "kds.txt")

kds.txt 文件内容：

"kids" "ages"
"1" "Jack" 12
"2" "Jill" 10

注意：最后一行为空行

cat() 可以用于写入文件。

cat("abc\n", file="u.txt")

append=TRUE 支持追加

cat("de\n", file="u.txt", append=TRUE)

u.txt

abc
de

写入多个字段

cat(file="v.txt", 1, 2, "xyz\n")

v.txt

1 2 xyz

writeLines() 写入文件。使用连接，需要手动关闭文件。

c <- file("w.txt", "w")
writeLines(c("abc", "de", "f"), c)
close(c)

w.txt

abc
de
f

获取文件和目录信息

file.info("w.txt")

      size isdir mode               mtime               ctime               atime exe
w.txt   12 FALSE  666 2020-11-07 13:04:46 2020-11-07 13:04:46 2020-11-07 13:23:21  no

dir(".")

[1] "io.nb.html"              "io.Rmd"                  "kds.txt"                
[4] "pandoce78036d12c46.html" "u.txt"                   "v.txt"                  
[7] "w.txt"

file.exists("wdp.txt")

[1] FALSE

getwd()

[1] "D:/windroc/project/study/r/tarp/chap10"

setwd("../")

扩展案例：多个文件内容的和

filea：5,12,13
fileb：3,4,5
filec：24,25,7

设置 dir() 函数的 recursive 参数会列出所有子目录中的文件

sumtree <- function(drtr) {
  tot <- 0
  fls <- dir(drtr, recursive=TRUE)
  for (f in fls) {
    f <- file.path(drtr, f)
    if (!file.info(f)$isdir) {
      tot <- tot + sum(scan(f, quiet=TRUE, sep=","))
    }
  }
  return(tot)
}