In this analysis I am going to build a Bayesian Network in R based on medical data to identify the likelihood of six possible diseases based on clinical presentations in infants.
The idea and data for this analysis came from the CHILD network created by David J. Spiegelhalter, A. Philip Dawid, Steffen L. Lauritzen, Robert G. Cowell. Here is a citation to an article on their work and the structure of a Bayesian Network they created:
David J. Spiegelhalter, A. Philip Dawid, Steffen L. Lauritzen, Robert G. Cowell “Bayesian Analysis in Expert Systems,” Statistical Science, Statist. Sci. 8(3), 219–247, (August, 1993)

I am going to use the Spiegelhalter et al. model as guidance to build the structure of a network. I will then parameterize with medical data, visualize the model, and perform queries. I will also visualize evidence based queries.
Load necessary packages.
library(bnlearn)
if (!require(“BiocManager”, quietly = TRUE))
install.packages(“BiocManager”)
BiocManager::install(“Rgraphviz”)
if (!require(“BiocManager”, quietly = TRUE))
install.packages(“BiocManager”)
BiocManager::install(“graph”)
library(Rgraphviz)
library(gRbase)
library(graph)
Construct the BN structure in R.
# create an empty graph
dag <- empty.graph(nodes = c(“BirthAsphyxia”, “Disease”, “Age”, “LVH”, “DuctFlow”, “CardiacMixing”, “LungParench”, “LungFlow”, “Sick”, “HypDistrib”, “HypoxiaInO2”, “CO2”,
“ChestXray”, “Grunting”, “LVHreport”, “LowerBodyO2”, “RUQO2”, “CO2Report”, “XrayReport”, “GruntingReport”))
quartz()
plot(dag)
dag <- set.arc(dag, from = “BirthAsphyxia”, to = “Disease”) #birth asphyxia -> disease
dag <- set.arc(dag, from = “Disease”, to = “Age”) #disease -> age at presentation
dag <- set.arc(dag, from = “Disease”, to = “LVH”) #disease -> LVH
dag <- set.arc(dag, from = “Disease”, to = “DuctFlow”) #disease -> duct flow
dag <- set.arc(dag, from = “Disease”, to = “CardiacMixing”) #disease -> cardiac mixing
dag <- set.arc(dag, from = “Disease”, to = “LungParench”) #disease -> lung parenchema
dag <- set.arc(dag, from = “Disease”, to = “LungFlow”) #disease -> lung flow
dag <- set.arc(dag, from = “Disease”, to = “Sick”) #disease -> sick
dag <- set.arc(dag, from = “LVH”, to = “LVHreport”) #LVH-> LVH report
dag <- set.arc(dag, from = “DuctFlow”, to = “HypDistrib”) #duct flow -> hypoxia distribution
dag <- set.arc(dag, from = “CardiacMixing”, to = “HypDistrib”) #cardiac mixing -> hypoxia distribution
dag <- set.arc(dag, from = “CardiacMixing”, to = “HypoxiaInO2”) #cardiac mixing -> hypoxia in O2
dag <- set.arc(dag, from = “LungParench”, to = “HypoxiaInO2”) #lung parenchema -> hypoxia in O2
dag <- set.arc(dag, from = “LungParench”, to = “CO2”) #lung parenchema -> CO2
dag <- set.arc(dag, from = “LungParench”, to = “ChestXray”) #lung parenchema -> chest x-ray
dag <- set.arc(dag, from = “LungParench”, to = “Grunting”) #lung parenchema -> grunting
dag <- set.arc(dag, from = “LungFlow”, to = “ChestXray”) #lung flow -> chest x-ray
dag <- set.arc(dag, from = “Sick”, to = “Grunting”) #sick -> grunting
dag <- set.arc(dag, from = “HypDistrib”, to = “LowerBodyO2”) #hypoxia distribution -> lower body O2
dag <- set.arc(dag, from = “HypoxiaInO2”, to = “LowerBodyO2”) #hypoxia in O2 -> lower body O2
dag <- set.arc(dag, from = “HypoxiaInO2”, to = “RUQO2”) #hypoxia in O2 -> right up quad O2
dag <- set.arc(dag, from = “CO2”, to = “CO2Report”) #CO2 -> CO2 report
dag <- set.arc(dag, from = “ChestXray”, to = “XrayReport”) #chest x-ray -> chest x-ray report
dag <- set.arc(dag, from = “Grunting”, to = “GruntingReport”) #grunting -> grunting report
quartz()
plot(dag)
The joint distribution:
modelstring(dag)
“[n1][n2|n1][n3|n2][n4|n2][n5|n2][n6|n2][n7|n2][n8|n2][n9|n2][n10|n5:n6][n11|n6:n7][n12|n7][n13|n7:n8][n14|n7:n9][n15|n4][n16|n10:n11][n17|n11][n18|n12][n19|n13][n20|n14]”
“[BirthAsphyxia][Disease|BirthAsphyxia][Age|Disease][LVH|Disease][DuctFlow|Disease][CardiacMixing|Disease][LungParench|Disease][LungFlow|Disease][Sick|Disease][HypDistrib|DuctFlow:CardiacMixing][HypoxiaInO2|CardiacMixing:LungParench][CO2|LungParench][ChestXray|LungParench:LungFlow][Grunting|LungParench:Sick][LVHreport|LVH][LowerBodyO2|HypDistrib:HypoxiaInO2][RUQO2|HypoxiaInO2][CO2Report|CO2][XrayReport|ChestXray][GruntingReport|Grunting]”
nodes(dag)
arcs(dag)
Parametrize the network.
child<- read.csv(“child_network.csv”, header=TRUE, colClasses=”factor”)
dim(child)
head(child)
bn.mle <- bn.fit(dag, data = child, method = “mle”) #maximum likelihood estimation
bn.mle
quartz()
graphviz.chart(bn.mle, grid = TRUE, main = “BN”)

Explore the conditional probability tables;
for example, what is the conditional probability table for n1?
bn.mle$BirthAsphyxia

What is the conditional probability table for n16?
bn.mle$LowerBodyO2

Perform queries on the data.
Suppose “lower body O2 < 5” and “X-ray report = Plethoric”, what can we deduce about the disease?
cpquery(bn.mle, event = (Disease == “Fallot”), evidence = ((LowerBodyO2 == “<5” & XrayReport == “Plethoric”))) #0.09681228
cpquery(bn.mle, event = (Disease == “Lung”), evidence = ((LowerBodyO2 == “<5” & XrayReport == “Plethoric”))) #0.02466368
cpquery(bn.mle, event = (Disease == “PAIVS”), evidence = ((LowerBodyO2 == “<5” & XrayReport == “Plethoric”))) #0.08390805
cpquery(bn.mle, event = (Disease == “PFC”), evidence = ((LowerBodyO2 == “<5” & XrayReport == “Plethoric”))) #0.01668521
cpquery(bn.mle, event = (Disease == “TAPVD”), evidence = ((LowerBodyO2 == “<5” & XrayReport == “Plethoric”))) #0.05727377
cpquery(bn.mle, event = (Disease == “TGA”), evidence = ((LowerBodyO2 == “<5” & XrayReport == “Plethoric”))) #0.7070707
library(gRain)
junction <- compile(as.grain(bn.mle))
jedu <- setEvidence(junction, nodes = c(“LowerBodyO2”, “XrayReport”), states = c(“<5”, “Plethoric”)) #set the evidence
quartz()
graphviz.chart(bn.mle, grid = TRUE, main = “Original BN”)
graphviz.chart(as.bn.fit(jedu, including.evidence = TRUE), grid = TRUE,
bar.col = c(BirthAsphyxia = “black”, Disease = “red”, Age = “black”, LVH = “black”,
DuctFlow = “black”, CardiacMixing = “black”, LungParench=”black”,
LungFlow=”black”, Sick=”black”, HypDistrib=”black”, HypoxiaInO2=”black”,
CO2=”black”, ChestXray=”black”, Grunting=”black”, LVHreport=”black”,
LowerBodyO2=”red”, RUQO2=”black”, CO2Report=”black”, XrayReport=”red”,
GruntingReport=”black”),
strip.bg = c(BirthAsphyxia = “transparent”, Disease = “red”, Age = “transparent”, LVH = “transparent”,
DuctFlow = “transparent”, CardiacMixing = “transparent”, LungParench=”transparent”,
LungFlow=”transparent”, Sick=”transparent”, HypDistrib=”transparent”, HypoxiaInO2=”transparent”,
CO2=”transparent”, ChestXray=”transparent”, Grunting=”transparent”, LVHreport=”transparent”,
LowerBodyO2=”red”, RUQO2=”transparent”, CO2Report=”transparent”, XrayReport=”red”,
GruntingReport=”transparent”), main = “BN with Evidence”)

We can deduce the disease is likely TGA; there is a 70% probability.
Suppose “lower body O2 < 5” and “X-ray report = Oligaemic” but the child is “not grunting”, what can we deduce about the disease?
cpquery(bn.mle, event = (Disease == “Fallot”), evidence = ((LowerBodyO2 == “<5”) & (XrayReport == “Oligaemic”) & (Grunting == “no”))) #0.4626866
cpquery(bn.mle, event = (Disease == “Lung”), evidence = ((LowerBodyO2 == “<5”) & (XrayReport == “Oligaemic”) & (Grunting == “no”))) #00.01447527
cpquery(bn.mle, event = (Disease == “PAIVS”), evidence = ((LowerBodyO2 == “<5”) & (XrayReport == “Oligaemic”) & (Grunting == “no”))) #0.3218263
cpquery(bn.mle, event = (Disease == “PFC”), evidence = ((LowerBodyO2 == “<5”) & (XrayReport == “Oligaemic”) & (Grunting == “no”))) # 0.04091456
cpquery(bn.mle, event = (Disease == “TAPVD”), evidence = ((LowerBodyO2 == “<5”) & (XrayReport == “Oligaemic”) & (Grunting == “no”))) #0.009888752
cpquery(bn.mle, event = (Disease == “TGA”), evidence = ((LowerBodyO2 == “<5”) & (XrayReport == “Oligaemic”) & (Grunting == “no”))) #0.08656036
jedu <- setEvidence(junction, nodes = c(“LowerBodyO2”, “XrayReport”, “Grunting”), states = c(“<5”, “Oligaemic”, “no”)) #set the evidence
quartz()
graphviz.chart(bn.mle, grid = TRUE, main = “Original BN”)
graphviz.chart(as.bn.fit(jedu, including.evidence = TRUE), grid = TRUE,
bar.col = c(BirthAsphyxia = “black”, Disease = “red”, Age = “black”, LVH = “black”,
DuctFlow = “black”, CardiacMixing = “black”, LungParench=”black”,
LungFlow=”black”, Sick=”black”, HypDistrib=”black”, HypoxiaInO2=”black”,
CO2=”black”, ChestXray=”black”, Grunting=”red”, LVHreport=”black”,
LowerBodyO2=”red”, RUQO2=”black”, CO2Report=”black”, XrayReport=”red”,
GruntingReport=”black”),
strip.bg = c(BirthAsphyxia = “transparent”, Disease = “red”, Age = “transparent”, LVH = “transparent”,
DuctFlow = “transparent”, CardiacMixing = “transparent”, LungParench=”transparent”,
LungFlow=”transparent”, Sick=”transparent”, HypDistrib=”transparent”, HypoxiaInO2=”transparent”,
CO2=”transparent”, ChestXray=”transparent”, Grunting=”red”, LVHreport=”transparent”,
LowerBodyO2=”red”, RUQO2=”transparent”, CO2Report=”transparent”, XrayReport=”red”,
GruntingReport=”transparent”), main = “BN with Evidence”)

We can deduce that the disease is most likely Fallot with a 46% probability; alternatively, there is a 32% probability it is PAIVS.
Baby Bea is “grunting” with “mild cardiac mixing”. Baby Hank is “not grunting” with “complete cardiac mixing”. What can we learn about these babies?
#Bea
cpquery(bn.mle, event = (Disease == “Fallot”), evidence = ((Grunting == “yes”) & (CardiacMixing == “Mild”))) #0.1809291
cpquery(bn.mle, event = (Disease == “Lung”), evidence = ((Grunting == “yes”) & (CardiacMixing == “Mild”))) #0.4311224
cpquery(bn.mle, event = (Disease == “PAIVS”), evidence = ((Grunting == “yes”) & (CardiacMixing == “Mild”))) #0.01578947
cpquery(bn.mle, event = (Disease == “PFC”), evidence = ((Grunting == “yes”) & (CardiacMixing == “Mild”))) #0.1182796
cpquery(bn.mle, event = (Disease == “TAPVD”), evidence = ((Grunting == “yes”) & (CardiacMixing == “Mild”))) #0.0234375
cpquery(bn.mle, event = (Disease == “TGA”), evidence = ((Grunting == “yes”) & (CardiacMixing == “Mild”))) #0.1741573
jedu <- setEvidence(junction, nodes = c(“Grunting”, “CariacMixing”), states = c(“yes”, “Mild”)) #set the evidence
quartz()
graphviz.chart(bn.mle, grid = TRUE, main = “Original BN”)
graphviz.chart(as.bn.fit(jedu, including.evidence = TRUE), grid = TRUE,
bar.col = c(BirthAsphyxia = “black”, Disease = “red”, Age = “black”, LVH = “black”,
DuctFlow = “black”, CardiacMixing = “red”, LungParench=”black”,
LungFlow=”black”, Sick=”black”, HypDistrib=”black”, HypoxiaInO2=”black”,
CO2=”black”, ChestXray=”black”, Grunting=”red”, LVHreport=”black”,
LowerBodyO2=”black”, RUQO2=”black”, CO2Report=”black”, XrayReport=”black”,
GruntingReport=”black”),
strip.bg = c(BirthAsphyxia = “transparent”, Disease = “red”, Age = “transparent”, LVH = “transparent”,
DuctFlow = “transparent”, CardiacMixing = “red”, LungParench=”transparent”,
LungFlow=”transparent”, Sick=”transparent”, HypDistrib=”transparent”, HypoxiaInO2=”transparent”,
CO2=”transparent”, ChestXray=”transparent”, Grunting=”red”, LVHreport=”transparent”,
LowerBodyO2=”transparent”, RUQO2=”transparent”, CO2Report=”transparent”, XrayReport=”transparent”,
GruntingReport=”transparent”), main = “BN with Evidence”)

The most likely disease that Bea has is lung disease with a 43% probability.
#Hank
cpquery(bn.mle, event = (Disease == “Fallot”), evidence = ((Grunting == “no”) & (CardiacMixing == “Complete”))) #0.4435653
cpquery(bn.mle, event = (Disease == “Lung”), evidence = ((Grunting == “no”) & (CardiacMixing == “Complete”))) #0.001911132
cpquery(bn.mle, event = (Disease == “PAIVS”), evidence = ((Grunting == “no”) & (CardiacMixing == “Complete”))) #0.4176822
cpquery(bn.mle, event = (Disease == “PFC”), evidence = ((Grunting == “no”) & (CardiacMixing == “Complete”))) #0.01378327
cpquery(bn.mle, event = (Disease == “TAPVD”), evidence = ((Grunting == “no”) & (CardiacMixing == “Complete”))) #0.06555819
cpquery(bn.mle, event = (Disease == “TGA”), evidence = ((Grunting == “no”) & (CardiacMixing == “Complete”))) #0.05988727
jedu <- setEvidence(junction, nodes = c(“Grunting”, “CariacMixing”), states = c(“no”, “Complete”)) #set the evidence
quartz()
graphviz.chart(bn.mle, grid = TRUE, main = “Original BN”)
graphviz.chart(as.bn.fit(jedu, including.evidence = TRUE), grid = TRUE,
bar.col = c(BirthAsphyxia = “black”, Disease = “red”, Age = “black”, LVH = “black”,
DuctFlow = “black”, CardiacMixing = “red”, LungParench=”black”,
LungFlow=”black”, Sick=”black”, HypDistrib=”black”, HypoxiaInO2=”black”,
CO2=”black”, ChestXray=”black”, Grunting=”red”, LVHreport=”black”,
LowerBodyO2=”black”, RUQO2=”black”, CO2Report=”black”, XrayReport=”black”,
GruntingReport=”black”),
strip.bg = c(BirthAsphyxia = “transparent”, Disease = “red”, Age = “transparent”, LVH = “transparent”,
DuctFlow = “transparent”, CardiacMixing = “red”, LungParench=”transparent”,
LungFlow=”transparent”, Sick=”transparent”, HypDistrib=”transparent”, HypoxiaInO2=”transparent”,
CO2=”transparent”, ChestXray=”transparent”, Grunting=”red”, LVHreport=”transparent”,
LowerBodyO2=”transparent”, RUQO2=”transparent”, CO2Report=”transparent”, XrayReport=”transparent”,
GruntingReport=”transparent”), main = “BN with Evidence”)

The most likely disease that Hank has is Fallot with a 44% probability.
In conclusion, this was an example of building a Bayesian Network in R. I started with building the structure of the network, and then showed how to parameterize using data. Visualizations and queries were then performed.
By Aspen Gulley on .

Leave a Reply