颜林林
2015年6月7日 @ 北京大学
library(ggplot2)
g <- ggplot(CODE, aes(x = slide, y = lines)) + # 用“+”指定绘图参数
geom_point(size = 3, color = "red") +
xlab("页码") + ylab("代码行数") + labs(title = "本幻灯片的R代码")
g # 变量显示即绘图,结果输出到图形窗口,而非终端
foo <- function(x) { Sys.sleep(1); return(x) } # 原始函数,无缓存
library(memoise)
Foo <- memoise(foo) # 构建一个有缓存的新函数
for (x in rep(1:4, 3)) {
system.time(foo(x)) # 无缓存
system.time(Foo(x)) # 有缓存
}
多层函数调用
# 计算代码最多的三张幻灯片的总代码行数
sum(head(sort(CODE$lines, decreasing = TRUE), 3))
## [1] 46
t1 <- sort(CODE$lines, decreasing = TRUE)
t2 <- head(t1, 3)
sum(t2)
管道写法
library(magrittr) # 或 library(dplyr)
CODE$lines %>%
sort(decreasing = TRUE) %>%
head(3) %>%
sum
另外两种管道写法
library(pipeR)
Pipe(CODE$lines) $
sort(decreasing = TRUE) $
head(3) $
sum
pipeline({
CODE$lines
sort(decreasing = TRUE)
head(3)
sum
})
对象封装
运算符重载
函数与闭包 (Closure)
自定义运算符
惰性求值 (Lazy evaluation)
元编程 (Meta programming)
a <- CODE$lines # 创建一个简单对象
class(a) <- "my_plot" # 定义S3类对象(S3类其实只是属性)
a # 显示变量内容,相当于调用了 print(a)
## [1] 5 7 10 11 3 4 4 3 8 12 15 5 3 2 3 3 8 1
## [19] 3 4 8 6 12 5 6 10 5 19 7 1
## attr(,"class")
## [1] "my_plot"
print # 查看print()的实现
## function (x, ...)
## UseMethod("print")
## <bytecode: 0x21baee8>
## <environment: namespace:base>
# 根据a的class属性,依次进行如下尝试:
print.my_plot(a)
print.default(a)
print.my_plot <- function(x) {
plot(x, col = "red") # 画图或其它操作
}
a # 再次显示变量,就变成了图形输出
`+`(2, 3) # 2 + 3
`<-`(x, 1:5) # x <- 1:5
`[`(x, 3) # x[3]
`+.my_plot` <- function(obj, opt) {
attr(obj, names(opt)) <- opt
return(obj)
}
print.my_plot <- function(x) {
plot(x, type = attr(x, "type"), col = attr(x, "col"))
}
a + c(type = "b") + c(col = "blue") # 如此,就可以通过"+"追加绘图参数
foo <- function(x) { Sys.sleep(1); return(x) } # 原始函数,无缓存
cache <- list() # 缓存变量
Foo <- function(x) {
key <- as.character(x) # 用参数做主键
if (!is.null(cache[[key]])) { # 检查是否已计算过
return(cache[[key]]) # 返回缓存值
} else {
res <- foo(x) # 调用原始函数进行计算
cache[[key]] <<- res # 保存至缓存
return(res)
}
}
library(memoise)
Foo <- memoise(foo) # 创建带缓存的函数
my_memoise <- function(f) {
cache <- list() # 缓存变量
return(function(x) {
key <- as.character(x) # 用参数做主键
if (!is.null(cache[[key]])) { # 检查是否已计算过
return(cache[[key]]) # 返回缓存值
} else {
res <- f(x) # 调用原始函数进行计算
cache[[key]] <<- res # 保存至缓存
return(res)
}
})
}
Foo <- my_memoise(foo)
for (x in rep(1:4, 3)) {
system.time(foo(x)) # 无缓存
system.time(Foo(x)) # 有缓存
}
Foo
## function(x) {
## key <- as.character(x) # 用参数做主键
## if (!is.null(cache[[key]])) { # 检查是否已计算过
## return(cache[[key]]) # 返回缓存值
## } else {
## res <- f(x) # 调用原始函数进行计算
## cache[[key]] <<- res # 保存至缓存
## return(res)
## }
## }
## <environment: 0x3cb01b0>
environment(Foo)
## <environment: 0x3cb01b0>
ls(envir = environment(Foo))
## [1] "cache" "f"
foo
## function(x) { Sys.sleep(1); return(x) }
environment(foo)
## <environment: R_GlobalEnv>
`%+%` <- function(a, b) paste(a, b)
"hello" %+% "magic"
## [1] "hello magic"
`%+%`("hello", "magic")
## [1] "hello magic"
改变函数调用的书写方式及顺序
+
a
b
,或写成函数形式:+(a, b)
a
+
b
a
b
+
`%|%` <- function(x, fun) fun(x)
# plot(sort(CODE$lines))
CODE$lines %|% sort %|% plot
# plot(sort(CODE$lines))
CODE$lines %|% sort %|% plot # 不带其它参数
# sum(head(sort(CODE$lines, decreasing = TRUE), 3))
CODE$lines %>%
sort(decreasing = TRUE) %>% # 带有其它参数
head(3) %>%
sum
sort(decreasing = TRUE) # 并不是可以正确运行的完整语句
## Error in sort.default(decreasing = TRUE): argument "x" is missing, with no default
下面的语句中,两个加法运算都会被执行吗?
ifelse(TRUE, 1 + 2, 3 + 4)
揭示真相
suppressWarnings(rm(a, b))
ifelse(TRUE, a, b)
## Error in ifelse(TRUE, a, b): object 'a' not found
ifelse(FALSE, a, b)
## Error in ifelse(FALSE, a, b): object 'b' not found
再来一个自定义函数
foo <- function(arg) {
cat("I don't care the 'arg'\n")
}
foo(a_variable-that.does$not%%exist)
## I don't care the 'arg'
对于语句:
CODE$lines %>% sort(decreasing = TRUE)
相当于:
`%>%` <- function(x, exp) {
# 刚进入此函数时:
# x 值为 CODE$lines
# y 值为 sort(decreasing = TRUE)
# 都还没有进行任何计算。此时有机会重新调整代码,使其能够被正确执行
...
}
语法解析:substitute(), parse(), deparse()
表达式构造:quote(), as.call()
表达式求值:eval(), source()
函数信息:match.call(), match.fun()
`%|%` <- function(x, exp) {
f <- as.list(substitute(exp)) # list(head, 3)
f <- c(f[1], substitute(x), f[-1]) # list(head, quote(1:5), 3)
eval(as.call(f), envir = parent.frame()) # head(1:5, 3)
}
CODE$lines %|% head(3)
## [1] 5 7 10
library(pipeR)
Pipe(CODE$lines) $
sort(decreasing = TRUE) $
head(3) $
sum
library(pipeR)
pipeline({
CODE$lines
sort(decreasing = TRUE)
head(3)
sum
})
as.list(substitute({
cmd1
cmd2(arg)
cmd3
}))
## [[1]]
## `{`
##
## [[2]]
## cmd1
##
## [[3]]
## cmd2(arg)
##
## [[4]]
## cmd3
foo <- function() {
cat("Hi, I am disappearing...\n")
}
bomb(foo) # 预埋焚毁装置
foo() # 第一次调用
## Hi, I am disappearing...
foo() # 第二次调用
## Error in eval(expr, envir, enclos): could not find function "foo"
bomb <- function(func) {
a <- deparse(func)
a <- c(head(a, -1),
quote(rm(list = as.character(match.call()[[1]]),
envir = sys.frame(-1))),
tail(a, 1))
assign(as.character(substitute(func)),
eval(parse(text = a)),
envir = environment(func))
}
foo <- function() {
cat("Hi, I am disappearing...\n")
}
bomb(foo)
foo
## function ()
## {
## cat("Hi, I am disappearing...\n")
## rm(list = as.character(match.call()[[1]]), envir = sys.frame(-1))
## }
## <environment: 0x3c80bd8>
Bomb <- function(func) {
a <- deparse(func)
a <- c(head(a, -1),
quote(rm(list = as.character(match.call()[[1]]),
envir = sys.frame(-1))),
tail(a, 1))
assign(as.character(substitute(func)),
eval(parse(text = a)),
envir = environment(func))
eval(parse(text = paste0("class(",
as.character(substitute(func)),
") <- 'bombed'")),
envir = environment(func))
}
print.bombed <- function(f) {
a <- deparse(unclass(f))
a <- c(head(a, -2), tail(a, 1))
cat(a, sep = "\n")
}
foo <- function() {
cat("Hi, I am disappearing...\n")
}
Bomb(foo)
foo
## function ()
## {
## cat("Hi, I am disappearing...\n")
## }
foo()
## Hi, I am disappearing...
foo()
## Error in eval(expr, envir, enclos): could not find function "foo"
解构:ggplot画图、缓存化、管道
学习:对象封装、运算符重载、闭包、惰性求值、元编程
未涉及:更底层的R实现、R与其它语言的结合
方法:“重新发明轮子”
sessionInfo()
## R version 3.2.0 (2015-04-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Gentoo/Linux
##
## locale:
## [1] LC_CTYPE=en_US.utf8 LC_NUMERIC=C
## [3] LC_TIME=en_US.utf8 LC_COLLATE=en_US.utf8
## [5] LC_MONETARY=en_US.utf8 LC_MESSAGES=en_US.utf8
## [7] LC_PAPER=en_US.utf8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.utf8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] methods stats graphics grDevices utils
## [6] datasets base
##
## other attached packages:
## [1] pipeR_0.6 magrittr_1.5 memoise_0.2.1 ggplot2_1.0.1
## [5] dplyr_0.4.1 knitr_1.10
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.11.5 MASS_7.3-40 munsell_0.4.2
## [4] colorspace_1.2-6 stringr_1.0.0 plyr_1.8.2
## [7] tools_3.2.0 parallel_3.2.0 grid_3.2.0
## [10] gtable_0.1.2 DBI_0.3.1 htmltools_0.2.6
## [13] yaml_2.1.13 lazyeval_0.1.10 assertthat_0.1
## [16] digest_0.6.8 reshape2_1.4.1 formatR_1.2
## [19] evaluate_0.7 rmarkdown_0.5.1 labeling_0.3
## [22] stringi_0.4-1 scales_0.2.4 proto_0.3-10