Material based on:
In this lab we will use the R–packages gRbase, gRain and gRim.
#gRbase dependencies
source("http://bioconductor.org/biocLite.R")
biocLite(c("graph", "RBGL", "Rgraphviz"))
install.packages('gRbase')
install.packages('gRain')
install.packages('gRim')
The task: Build Bayesian network for diagnosing coronary artery disease (CAD) from these data:
library(graph); library(RBGL); library(Rgraphviz)
library(gRbase); library(gRain); library(gRim)
data(cad1)
head(cad1)
summary(cad1)
For prediction model validation use cad2
data. Notice: the information is incomplete (NA’s).
data(cad2)
head(cad2)
summary(cad2)
Use the function ug()
from gRbase to create an undirected graph
library(gRbase)
g1 <- ug(~a:b:e + a:c:e + b:d:e + c:d:e + c:g + d:f)
class(g1)
## [1] "graphNEL"
## attr(,"package")
## [1] "graph"
as(g1, "matrix")
## a b e c d g f
## a 0 1 1 1 0 0 0
## b 1 0 1 0 1 0 0
## e 1 1 0 1 1 0 0
## c 1 0 1 0 1 1 0
## d 0 1 1 1 0 0 1
## g 0 0 0 1 0 0 0
## f 0 0 0 0 1 0 0
Graphs can be displayed with the plot()
method
library(Rgraphviz)
plot(g1)
Graphs can also be defined using adjacency matrices:
m <- matrix(c(0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0,
1, 1, 0, 1, 1, 1, 1, 1, 0), nrow = 5)
rownames(m) <- colnames(m) <- c("a", "b", "c", "d", "e")
m
as(m, "graphNEL")
Graphs can be altered using addEdge()
and removeEdge()
functions
g1a <- addEdge("a", "d", g1)
g1b <- removeEdge("c", "d", g1)
par(mfrow = c(1, 3))
plot(g1, main = "g1")
plot(g1a, main = "g1a")
plot(g1b, main = "g1b")
Use subGraph
function to extract subgraph of g1
containing “b”, “c”, “d”, “e” edges. The output should look like:
library(RBGL)
g1 <- ug(~a:b:e + a:c:e + b:d:e + c:d:e + c:g + d:f)
is.complete(g1)
is.complete(g1,set=c("a","b","e"))
str(maxClique(g1))
library(RBGL)
g2 <- ug(~a:b:e + a:c:e + b:d:e + c:d:e)
plot(g2)
separates("a", "d", c("b", "c", "e"), g2)
## [1] TRUE
g3 <- ug(~ A:B + B:C:D + C:E + D:E)
plot(g3)
g3 <- ug(~ A:B + B:C:D + C:E + D:E)
plot(g3)
separates(c("D","E"), "A", "B", g3)
## [1] TRUE
A DAG is created by the dag()
function in gRbase
. The graph can be specified by a list of formulas or by a list of vectors. The following statements are equivalent:
dag0 <- dag(~a, ~b * a, ~c * a * b, ~d * c * e, ~e * a)
dag0v2 <- dag(~a + b:a + c:a:b + d:c:e + e:a)
dag0v3 <- dag("a", c("b", "a"), c("c", "a", "b"), c("d", "c", "e"), c("e", "a"))
dag0
## A graphNEL graph with directed edges
## Number of Nodes = 5
## Number of Edges = 6
dag0v2
## A graphNEL graph with directed edges
## Number of Nodes = 5
## Number of Edges = 6
dag0v3
## A graphNEL graph with directed edges
## Number of Nodes = 5
## Number of Edges = 6
Note that dbc and d:b:c means that ”d” has parents ”b” and ”c”.
plot(dag0)
par(mfrow=c(1,3))
plot(dag0)
plot(dag0v2)
plot(dag0v3)
Similarly, directed graphs can also be created from matrices:
(m <- matrix(c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 1, 1, 0), nrow = 5))
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0 1 0 0 0
## [2,] 0 0 1 1 0
## [3,] 0 0 0 0 1
## [4,] 0 0 0 0 1
## [5,] 0 0 0 0 0
rownames(m) <- colnames(m) <- letters[1:5]
dg <- as(m, "graphNEL")
plot(dg)
dag0 <- dag(~a + b:a + c:a:b + d:c:e + e:a)
parents("d", dag0)
## [1] "c" "e"
children("c", dag0)
## [1] "d"
Reading conditional independencies is different:
par(mfrow=c(3,1),mar=c(3,1,2,1))
plot(dag(~a+b:a+c:b),"circo")
plot(dag(~c+b:c+a:b),"circo")
plot(dag(~b+a:b+c:b),"circo")
plot(dag(~a+c+b:a:c),"circo")
An important operation on DAGs is to (i) add edges between the parents of each node, and then (ii) replace all directed edges with undirected ones, thus returning an undirected graph. This is known as moralization.
dag0m <- moralize(dag0)
par(mfrow=c(1,2))
plot(dag0)
plot(dag0m)
The output should look like:
## List of 1
## $ maxCliques:List of 6
## ..$ : chr [1:3] "either" "tub" "lung"
## ..$ : chr [1:3] "either" "bronc" "dysp"
## ..$ : chr [1:2] "either" "xray"
## ..$ : chr [1:2] "smoke" "lung"
## ..$ : chr [1:2] "smoke" "bronc"
## ..$ : chr [1:2] "asia" "tub"
## Parents of 'either': tub lung
## Children of 'either': xray dysp
## Parents of 'smoke':
## Children of 'smoke': lung bronc
This is example based on Lauritzen and Spiegehalter (1988) where:
ancestralSet(c("a", "c", "e"), dag0)
## [1] "a" "b" "c" "e"
plot(ancestralGraph(c("a", "c", "e"), dag0))
Giving the DAG from Exercise2:
smoke
and either
smoke
and either
tub
andsmoke
xray
The output should look like:
## 'Ancestral set of 'smoke' and 'either': asia tub smoke lung either
## 'Ancestral set of 'tub' and 'smoke': asia tub smoke
## 'Ancestral set of 'xray: asia tub smoke lung either xray
To check if A ⊥⊥ B|S form the ancestral graph of A ∪ B ∪ S. Moralize this ancestral graph. If A and B are separated by S in this moral graph then A ⊥⊥ B|S.
In our case it can be used as follows:
plot(dag0)
par(mfrow=c(1,2))
plot(ancestralGraph(c("a", "c", "e"), dag0))
plot(moralize(ancestralGraph(c("a", "c", "e"), dag0)))
Why this works: Because we can integrate over the variables not in the ancestral set of A ∪ B ∪ S. Then we use the factorization structure in p(An(A ∪ B ∪ S)).
This is the main point when we come to linking BNs to statistical models and data!!!
This is the topic of next lab.