-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathEx1.Rmd
More file actions
145 lines (131 loc) · 4.45 KB
/
Ex1.Rmd
File metadata and controls
145 lines (131 loc) · 4.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
---
title: "Debugging Example"
author: "Win-Vector LLC"
date: "April 9, 2016"
output: html_document
---
* Video lecture: https://youtu.be/-P9UzQuJSH8
* This document and all example code: http://winvector.github.io/Debugging/
* Blog announcement (good place for questions and comments): http://www.win-vector.com/blog/2016/04/free-data-science-video-lecture-debugging-in-r/
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r defs, echo=FALSE}
library('ggplot2')
library('ROCR')
# pretend experiment, just an example
pretendBigExpensiveExperiment <- function() {
d <- data.frame(x=rnorm(5))
d$y <- (rnorm(nrow(d)) + d$x)>=0
d
}
# ROCR plot taken from https://github.com/WinVector/WVPlots
# #' Plot receiver operating characteristic plot.
#'
#' @param frame data frame to get values from
#' @param xvar name of the independent (input or model) column in frame
#' @param truthVar name of the dependent (output or result to be modeled) column in frame
#' @param title title to place on plot
#' @param ... no unnamed argument, added to force named binding of later arguments.
#' @examples
#'
#' set.seed(34903490)
#' x = rnorm(50)
#' y = 0.5*x^2 + 2*x + rnorm(length(x))
#' frm = data.frame(x=x,y=y,yC=y>=as.numeric(quantile(y,probs=0.8)))
#' frm$absY <- abs(frm$y)
#' frm$posY = frm$y > 0
#' frm$costX = 1
#' WVPlots::ROCPlot(frm, "x", "yC", title="Example ROC plot")
#'
#' @export
ROCPlot <- function(frame, xvar, truthVar,title,...) {
# checkArgs(frame=frame,xvar=xvar,yvar=truthVar,title=title,...)
outcol <- frame[[truthVar]]
predcol <- frame[[xvar]]
pred <- ROCR::prediction(predcol,outcol)
perf <- ROCR::performance(pred,'tpr','fpr')
auc <- as.numeric(ROCR::performance(pred,'auc')@y.values)
pf <- data.frame(
[email protected][[1]],
[email protected][[1]])
palletName = "Dark2"
plot= ggplot2::ggplot() +
ggplot2::geom_ribbon(data=pf,
ggplot2::aes(x=FalsePositiveRate,ymax=TruePositiveRate,ymin=0),
alpha=0.3) +
ggplot2::geom_point(data=pf,
ggplot2::aes(x=FalsePositiveRate,y=TruePositiveRate)) +
ggplot2::geom_line(data=pf,
ggplot2::aes(x=FalsePositiveRate,y=TruePositiveRate)) +
ggplot2::geom_line(ggplot2::aes(x=c(0,1),y=c(0,1))) +
ggplot2::coord_fixed() +
ggplot2::scale_fill_brewer(palette=palletName) +
ggplot2::scale_color_brewer(palette=palletName) +
ggplot2::ggtitle(paste(title,'\n',
truthVar, '~', xvar, '\n',
'AUC:',format(auc,digits=2)))
plot
}
```
```{r wrapperfn}
# debug runner from https://github.com/WinVector/WVPlots/blob/master/R/DebugFn.R
#' Capture arguments of exception throwing plot for later debugging.
#'
#' Run fn and print result, save arguments on failure. Use on systems like ggplot()
#' where some calculation is delayed until print().
#'
#' @param saveFile path to save RDS to.
#' @param fn function to call
#' @param ... arguments for fn
#' @return fn(...) normally, but if f(...) throws an exception save to saveFile RDS of list r such that do.call(r$fn,r$args) repeats the call to fn with args.
#'
#' @examples
#'
#' d <- data.frame(x=1:5)
#' DebugPrintFn('problem.RDS','PlotDistCountNormal',d,xvar='x','example')
#' tryCatch(
#' DebugPrintFn('problem.RDS','PlotDistCountNormal',
#' d,xvar='xmisspelled','example'),
#' error = function(e) { print(e) })
#'
#' @export
DebugPrintFn <- function(saveFile,fn,...) {
args <- list(...)
tryCatch({
res = do.call(fn,args)
print(res)
res
},
error = function(e) {
saveRDS(object=list(fn=fn,args=args),file=saveFile)
stop(paste0("Wrote '",saveFile,"' on catching '",as.character(e),"'"))
})
}
```
```{r example1}
# Run once, seems okay
set.seed(25352)
d <- pretendBigExpensiveExperiment()
print(ROCPlot(d,'x','y','example'))
```
```{r largerrun,error=TRUE,fig.keep='last'}
# Run a lot of important experiment
for(runNum in seq_len(100)) {
d <- pretendBigExpensiveExperiment()
print(ROCPlot(d,'x','y',paste('example',runNum)))
}
```
```{r rerunsaveexample,error=TRUE,fig.keep='last'}
# Run a lot of important experiment
# not the exact same set of experiments as we didn't reset pseudo-random seed!
for(runNum in seq_len(100)) {
d <- pretendBigExpensiveExperiment()
DebugPrintFn('problem.RDS','ROCPlot',d,'x','y',paste('example',runNum))
}
```
```{r examinetherpblem,error=TRUE}
problem <- readRDS('problem.RDS')
print(problem)
do.call(problem$fn,problem$args)
```