-
Notifications
You must be signed in to change notification settings - Fork 0
/
polya_urn.R
102 lines (94 loc) · 5.25 KB
/
polya_urn.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Based on the Polya urn model, this program will create
# 1. a plot showing how the observed ratio of black balls in one Polya urn changes over
# 1000 draws(steps) with given initial numbers of black balls(a) and red balls(b).
#
# 2. a plot that compares the observed ratios of black balls in 10000 urns after
# 1000 draws(steps) with given initial conditions (a, b), compared to the density function of
# the Beta(a,b) distribution. (a: the initial number of black balls; b: the initial number of red balls)
# First, download required packages and declare variables to store data from the simulations
library(ggplot2)
draws <- 1000 # number of steps to sample in one trial
ratio_single <- matrix(0, draws, 2) #create a matrix to record the ratio of black balls in the urn over each draw
# The polya_urn_single function will create a plot of the changing ratio of black balls in one Polya urn over
# 1000 draws(steps) with given initial numbers of black balls(a) and red balls(b).
# We can see that after a certain number of draws, the ratio converges to a limit.
# The limit itself is random although the parameters stay unchanged.
# In the polya_urn_multi function, we show that after a large number of trials, the proportion of black balls in the urn
# converges in distribution to the Beta distribution with parameters(a, b). Hence, we can
# say the limit generated by one Polya urn is a simulation/sample from the corresponding Beta(a,b).
polya_urn_single <- function(a, b) {
black_curr <- a #current number of black balls
red_curr <- b #current number of red balls
for (n in 1:draws){
# the prob of drawing a black ball is (current # of black balls)/(initial # of black+red balls + current # of steps)
# the prob of drawing a red ball is (current # of red balls)/(initial # of black+red balls + current # of steps)
# Given those probabilities, sample a ball from the urn,
# here we use 1 to represent a balck ball and 0 to represent a red ball
drawn<-sample(c(1,0), size=1, prob=c(black_curr, red_curr)/(a + b + n))
if (drawn == 1) {
black_curr <- black_curr + 1
} else {
red_curr <- red_curr + 1
}
ratio_single[n, 1] <- n
ratio_single[n, 2] <- black_curr/(a + b + n)
}
plot(ratio_single, main = "Convergence of the ratio of black balls",
xlab = "number of draws in one urn", ylab = "", type = "l")
}
polya_urn_single(1, 1)
# Given initial numbers of black and red balls, the polya_urn_multi function will show a histogram of observed
# ratios of black balls after 1000 draws in 10000 urns. It also compares the overall shape of bins to the density function of
# the corresponding Beta(a,b) distribution. (a: the initial number of black balls; b: the initial number of red balls)
# The output plot visualizes how the ratio converges in distribution to the corresponding Beta distribution with same parameters
# after a huge number of trials
trials <- 10000 # number of trials(more trials make the graph clearer)
ratio_multi <- matrix(0, trials,1) #create a vector to record the ratio of black balls in each trial
# Parameters of the function
# a: initial number of black balls
# b: initial number of red balls
polya_urn_multi <- function(a, b) {
for (i in 1:trials){
black_curr <- a #current number of black balls
red_curr <- b #current number of red balls
for (n in 1:draws){
# the prob of drawing a black ball is (current # of black balls)/(initial # of black+red balls + current # of steps)
# the prob of drawing a red ball is (current # of red balls)/(initial # of black+red balls + current # of steps)
# Given those probabilities, sample a ball from the urn,
# here we use 1 to represent a balck ball and 0 to represent a red ball
drawn<-sample(c(1,0), size=1, prob=c(black_curr, red_curr)/(a + b + n))
if (drawn == 1) {
black_curr <- black_curr + 1
} else {
red_curr <- red_curr + 1
}
}
# in the ith trial, add the ratio of black balls in the urn after n steps
ratio[i]<-black_curr/(a +b + n)
}
# the proportion of black balls in the urn converges in distribution
# to the Beta distribution with parameters the same as initial # of red balls
# and initial # of black balls when n grows.
# As shown by the graph
sort_ratio <- sort(ratio)
beta_pdf <- dbeta(sort_ratio, a, b)
df <- data.frame(ratio,beta_pdf)
beta_para <- paste("Beta(",a,",",b,")")
num_bins <- 20
urn_plot <- ggplot(df, aes(ratio)) +
geom_histogram(aes(x = ratio), fill="lightblue", bins = num_bins, boundary=0, color = "white") +
scale_x_continuous(breaks = seq(0, 1, by=0.2), limits=c(0,1)) +
labs(title = beta_para,
x = "",
y = "") +
theme_bw(base_size = 16) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text.y = element_blank(),
plot.margin = unit(c(1, 1, 1, 2), "cm")) +
geom_line(aes(x = sort_ratio, y= (trials/num_bins) * beta_pdf), colour="red")
print(urn_plot)
}
# Simulate the Polya's urn with _ black ball and _ red ball at the beginning(you can fill the parameters
# with different sets of numbers) Repeat the experiment for 10000 times.
# Since the the number of iterations is huge, the function takes a while to give the output.
polya_urn_multi(1,8)