-
Notifications
You must be signed in to change notification settings - Fork 247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
path compression variants for union-find IntDisjointSet #913
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,13 +60,64 @@ | |
return p | ||
end | ||
|
||
# iterative path compression: makes every node on the path point directly to the root | ||
@inline function find_root_iterative!(parents::Vector{T}, x::Integer) where {T<:Integer} | ||
current = x | ||
# find the root of the tree | ||
@inbounds while parents[current] != current | ||
current = parents[current] | ||
end | ||
root = current | ||
# compress the path: make every node point directly to the root | ||
current = x | ||
@inbounds while parents[current] != root | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. address the test coverage warning. |
||
p = parents[current] # temporarily store the parent | ||
parents[current] = root # point directly to the root | ||
current = p # move to the next node in the original path | ||
end | ||
return root | ||
end | ||
|
||
# path-halving and path-splitting are a one-pass forms of path compression with inverse-ackerman complexity | ||
# e.g., see p.19 of https://www.cs.princeton.edu/courses/archive/spr11/cos423/Lectures/PathCompressionAnalysisII.pdf | ||
|
||
# path-halving: every node on the path points to its grandparent | ||
@inline function find_root_halving!(parents::Vector{T}, x::Integer) where {T<:Integer} | ||
current = x # use a separate variable 'current' to track traversal | ||
@inbounds while parents[current] != current | ||
@inbounds parents[current] = parents[parents[current]] # point to grandparent | ||
@inbounds current = parents[current] # move to grandparent | ||
end | ||
return current | ||
end | ||
|
||
# path-splitting: every node on the path points to its grandparent | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the exact difference between path compression using path halving and path splitting? it's not very clear. can you illustrate with an example? |
||
@inline function find_root_splitting!(parents::Vector{T}, x::Integer) where {T<:Integer} | ||
@inbounds while parents[x] != x | ||
p = parents[x] # store the current parent | ||
parents[x] = parents[p] # point to grandparent | ||
x = p # move to parent | ||
end | ||
return x | ||
end | ||
|
||
|
||
struct PCRecursive end # path compression types | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make this an enum. |
||
struct PCIterative end # path compression types | ||
struct PCHalving end # path compression types | ||
struct PCSplitting end # path compression types | ||
|
||
""" | ||
find_root!(s::IntDisjointSet{T}, x::T) | ||
|
||
Find the root element of the subset that contains an member `x`. | ||
Path compression happens here. | ||
""" | ||
find_root!(s::IntDisjointSet{T}, x::T) where {T<:Integer} = find_root_impl!(s.parents, x) | ||
@inline find_root!(s::IntDisjointSet{T}, x::T) where {T<:Integer} = find_root_impl!(s.parents, x) # default | ||
@inline find_root!(s::IntDisjointSet{T}, x::T, ::PCRecursive) where {T<:Integer} = find_root_impl!(s.parents, x) | ||
@inline find_root!(s::IntDisjointSet{T}, x::T, ::PCIterative) where {T<:Integer} = find_root_iterative!(s.parents, x) | ||
@inline find_root!(s::IntDisjointSet{T}, x::T, ::PCHalving) where {T<:Integer} = find_root_halving!(s.parents, x) | ||
@inline find_root!(s::IntDisjointSet{T}, x::T, ::PCSplitting) where {T<:Integer} = find_root_splitting!(s.parents, x) | ||
|
||
""" | ||
in_same_set(s::IntDisjointSet{T}, x::T, y::T) | ||
|
@@ -191,6 +242,10 @@ | |
Find the root element of the subset in `s` which has the element `x` as a member. | ||
""" | ||
find_root!(s::DisjointSet{T}, x::T) where {T} = s.revmap[find_root!(s.internal, s.intmap[x])] | ||
find_root!(s::DisjointSet{T}, x::T, ::PCIterative) where {T} = s.revmap[find_root!(s.internal, s.intmap[x], PCIterative())] | ||
find_root!(s::DisjointSet{T}, x::T, ::PCRecursive) where {T} = s.revmap[find_root!(s.internal, s.intmap[x], PCRecursive())] | ||
find_root!(s::DisjointSet{T}, x::T, ::PCHalving) where {T} = s.revmap[find_root!(s.internal, s.intmap[x], PCHalving())] | ||
find_root!(s::DisjointSet{T}, x::T, ::PCSplitting) where {T} = s.revmap[find_root!(s.internal, s.intmap[x], PCSplitting())] | ||
|
||
""" | ||
in_same_set(s::DisjointSet{T}, x::T, y::T) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# Benchmark on disjoint set forests | ||
|
||
using DataStructures | ||
using DataStructures, BenchmarkTools | ||
|
||
# do 10^6 random unions over 10^6 element set | ||
|
||
|
@@ -29,3 +29,43 @@ x = rand(1:n, T) | |
y = rand(1:n, T) | ||
|
||
@time batch_union!(s, x, y) | ||
|
||
#= | ||
benchmark `find` operation | ||
=# | ||
|
||
function create_disjoint_set_struct(n::Int) | ||
parents = [1; collect(1:n-1)] # each element's parent is its predecessor | ||
ranks = zeros(Int, n) # ranks are all zero | ||
IntDisjointSet(parents, ranks, n) | ||
end | ||
|
||
# benchmarking function | ||
function benchmark_find_root(n::Int) | ||
println("Benchmarking recursive path compression implementation (find_root_impl!):") | ||
if n >= 10^5 | ||
println("Recursive may path compression may encounter stack-overflow; skipping") | ||
else | ||
s = create_disjoint_set_struct(n) | ||
@btime find_root!($s, $n, PCRecursive()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. increase the number of evals to let's say 100. post the median and max time. do it for all of the methods |
||
end | ||
|
||
println("Benchmarking iterative path compression implementation (find_root_iterative!):") | ||
s = create_disjoint_set_struct(n) # reset parents | ||
@btime find_root!($s, $n, PCIterative()) | ||
|
||
println("Benchmarking path-halving implementation (find_root_halving!):") | ||
s = create_disjoint_set_struct(n) # reset parents | ||
@btime find_root!($s, $n, PCHalving()) | ||
|
||
println("Benchmarking path-splitting implementation (find_root_path_splitting!):") | ||
s = create_disjoint_set_struct(n) # reset parents | ||
@btime find_root!($s, $n, PCSplitting()) | ||
end | ||
|
||
# run benchmark tests | ||
benchmark_find_root(1_000) | ||
benchmark_find_root(10_000) | ||
benchmark_find_root(100_000) | ||
benchmark_find_root(1_000_000) | ||
benchmark_find_root(10_000_000) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
after making it an enum, export the enum