From 2d017b2d9419a81fc72a0a1ad7acd2f1b4ffb78b Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Tue, 20 Dec 2016 11:12:30 -0500 Subject: [PATCH 01/12] Implement find_first/last, position_first/last --- src/par_iter/find_first_last.rs | 182 ++++++++++++++++++++++++++++++++ src/par_iter/mod.rs | 55 ++++++++++ src/par_iter/test.rs | 25 +++++ 3 files changed, 262 insertions(+) create mode 100644 src/par_iter/find_first_last.rs diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs new file mode 100644 index 000000000..0817e57e2 --- /dev/null +++ b/src/par_iter/find_first_last.rs @@ -0,0 +1,182 @@ +use std::cell::Cell; +use std::sync::atomic::{AtomicUsize, Ordering}; +use super::*; +use super::len::*; + +// The consumer for find_first/find_last has fake indexes representing the lower +// and upper bounds of the "range" of data it consumes. This range does not +// correspond to indexes from the consumed iterator but rather indicate the +// consumer's position relative to other consumers. The purpose is to allow a +// consumer to know it should stop consuming items when another consumer finds a +// better match. + +// An indexed consumer could specialize to use the real indexes instead, but we +// don't implement that for now. The only downside of the current approach is +// that in some cases, iterators very close to each other will have the same +// range and therefore not be able to stop processing if one of them finds a +// better match than the others. + +#[derive(Copy, Clone)] +enum MatchPosition { + Leftmost, + Rightmost, +} + +pub fn find_first(pi: PAR_ITER, find_op: FIND_OP) -> Option + where PAR_ITER: ParallelIterator, + FIND_OP: Fn(&PAR_ITER::Item) -> bool + Sync +{ + let best_found = AtomicUsize::new(usize::max_value()); + let consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &best_found); + pi.drive_unindexed(consumer) +} + +pub fn find_last(pi: PAR_ITER, find_op: FIND_OP) -> Option + where PAR_ITER: ParallelIterator, + FIND_OP: Fn(&PAR_ITER::Item) -> bool + Sync +{ + let best_found = AtomicUsize::new(0); + let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &best_found); + pi.drive_unindexed(consumer) +} + +struct FindConsumer<'f, FIND_OP: 'f> { + find_op: &'f FIND_OP, + lower_bound: Cell, + upper_bound: usize, + match_position: MatchPosition, + best_found: &'f AtomicUsize, +} + +impl<'f, FIND_OP> FindConsumer<'f, FIND_OP> { + fn new(find_op: &'f FIND_OP, + match_position: MatchPosition, + best_found: &'f AtomicUsize) -> Self { + FindConsumer { + find_op: find_op, + lower_bound: Cell::new(0), + upper_bound: usize::max_value(), + match_position: match_position, + best_found: best_found, + } + } +} + +impl<'f, ITEM, FIND_OP> Consumer for FindConsumer<'f, FIND_OP> + where ITEM: Send, + FIND_OP: Fn(&ITEM) -> bool + Sync +{ + type Folder = FindFolder<'f, ITEM, FIND_OP>; + type Reducer = FindReducer; + type Result = Option; + + fn cost(&mut self, cost: f64) -> f64 { + cost * FUNC_ADJUSTMENT + } + + fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) { + let dir = self.match_position; + (self.split_off(), + self, + FindReducer { match_position: dir }) + } + + fn into_folder(self) -> Self::Folder { + FindFolder { + find_op: self.find_op, + boundary: match self.match_position { + MatchPosition::Leftmost => self.lower_bound.get(), + MatchPosition::Rightmost => self.upper_bound + }, + match_position: self.match_position, + best_found: self.best_found, + item: None, + } + } + + fn full(&self) -> bool { + let best = self.best_found.load(Ordering::Relaxed); + match self.match_position { + // can stop consuming if the best found index so far is *strictly* + // better than anything this consumer will find + MatchPosition::Leftmost => best < self.lower_bound.get(), + MatchPosition::Rightmost => best > self.upper_bound + } + } +} + +impl<'f, ITEM, FIND_OP> UnindexedConsumer for FindConsumer<'f, FIND_OP> + where ITEM: Send, + FIND_OP: Fn(&ITEM) -> bool + Sync +{ + fn split_off(&self) -> Self { + // Upper bound for one consumer will be lower bound for the other. This + // overlap is okay, because only one of the bounds will be used for + // comparing against best_found; the other is kept only to be able to + // divide the range in half + let old_lower_bound = self.lower_bound.get(); + let median = old_lower_bound + ((self.upper_bound - old_lower_bound) / 2); + self.lower_bound.set(median); + + FindConsumer { + find_op: self.find_op, + lower_bound: Cell::new(old_lower_bound), + upper_bound: median, + match_position: self.match_position, + best_found: self.best_found, + } + } + + fn to_reducer(&self) -> Self::Reducer { + FindReducer { match_position: self.match_position } + } +} + +struct FindFolder<'f, ITEM, FIND_OP: 'f> { + find_op: &'f FIND_OP, + boundary: usize, + match_position: MatchPosition, + best_found: &'f AtomicUsize, + item: Option, +} + +impl<'f, FIND_OP: 'f + Fn(&ITEM) -> bool, ITEM> Folder for FindFolder<'f, ITEM, FIND_OP> { + type Result = Option; + + fn consume(mut self, item: ITEM) -> Self { + if (self.find_op)(&item) { + // This may sometimes set best_found to a worse index than it was + // before, depending on timing. This means more consumers will + // continue to run than necessary, but the reducer will still ensure + // the correct value is returned. + self.best_found.swap(self.boundary, Ordering::Relaxed); + self.item = Some(item); + } + self + } + + fn complete(self) -> Self::Result { + self.item + } + + fn full(&self) -> bool { + let best_found = self.best_found.load(Ordering::Relaxed); + match self.match_position { + MatchPosition::Leftmost => best_found < self.boundary, + MatchPosition::Rightmost => best_found > self.boundary, + } + } +} + +struct FindReducer { + match_position: MatchPosition +} + +impl Reducer> for FindReducer { + fn reduce(self, left: Option, right: Option) -> Option { + match self.match_position { + MatchPosition::Leftmost => left.or(right), + MatchPosition::Rightmost => right.or(left) + } + } +} diff --git a/src/par_iter/mod.rs b/src/par_iter/mod.rs index 4adc8e7ae..4aa793012 100644 --- a/src/par_iter/mod.rs +++ b/src/par_iter/mod.rs @@ -30,6 +30,7 @@ use self::weight::Weight; use self::zip::ZipIter; pub mod find; +pub mod find_first_last; pub mod chain; pub mod collect; pub mod enumerate; @@ -575,6 +576,28 @@ pub trait ParallelIterator: Sized { find::find(self, predicate) } + /// Searches for the **first** item in the parallel iterator that + /// matches the given predicate and returns it. + /// + /// Once a match is found, all attempts to the right of the match + /// will be stopped, while attempts to the left must continue in case + /// an earlier match is found. + fn find_first(self, predicate: FIND_OP) -> Option + where FIND_OP: Fn(&Self::Item) -> bool + Sync { + find_first_last::find_first(self, predicate) + } + + /// Searches for the **last** item in the parallel iterator that + /// matches the given predicate and returns it. + /// + /// Once a match is found, all attempts to the left of the match + /// will be stopped, while attempts to the right must continue in case + /// a later match is found. + fn find_last(self, predicate: FIND_OP) -> Option + where FIND_OP: Fn(&Self::Item) -> bool + Sync { + find_first_last::find_last(self, predicate) + } + #[doc(hidden)] #[deprecated(note = "parallel `find` does not search in order -- use `find_any`")] fn find(self, predicate: FIND_OP) -> Option @@ -824,6 +847,38 @@ pub trait IndexedParallelIterator: ExactParallelIterator { .map(|(i, _)| i) } + /// Searches for the **first** item in the parallel iterator that + /// matches the given predicate, and returns its index. + /// + /// Like `ParallelIterator::find_first`, once a match is found, + /// all attempts to the right of the match will be stopped, while + /// attempts to the left must continue in case an earlier match + /// is found. + fn position_first(self, predicate: POSITION_OP) -> Option + where POSITION_OP: Fn(Self::Item) -> bool + Sync + { + self.map(predicate) + .enumerate() + .find_first(|&(_, p)| p) + .map(|(i, _)| i) + } + + /// Searches for the **last** item in the parallel iterator that + /// matches the given predicate, and returns its index. + /// + /// Like `ParallelIterator::find_last`, once a match is found, + /// all attempts to the left of the match will be stopped, while + /// attempts to the right must continue in case a later match + /// is found. + fn position_last(self, predicate: POSITION_OP) -> Option + where POSITION_OP: Fn(Self::Item) -> bool + Sync + { + self.map(predicate) + .enumerate() + .find_last(|&(_, p)| p) + .map(|(i, _)| i) + } + #[doc(hidden)] #[deprecated(note = "parallel `position` does not search in order -- use `position_any`")] fn position(self, predicate: POSITION_OP) -> Option diff --git a/src/par_iter/test.rs b/src/par_iter/test.rs index 61ef91cf2..cd9517b16 100644 --- a/src/par_iter/test.rs +++ b/src/par_iter/test.rs @@ -997,6 +997,31 @@ pub fn find_any() { assert!(a.par_iter().all(|&x| x >= 0)); } +#[test] +pub fn find_first_or_last() { + let a: Vec = (0..1024).collect(); + + assert_eq!(a.par_iter().find_first(|&&x| x % 42 == 41), Some(&41_i32)); + assert_eq!(a.par_iter().find_first(|&&x| x % 19 == 1 && x % 53 == 0), + Some(&742_i32)); + assert_eq!(a.par_iter().find_first(|&&x| x < 0), None); + + assert_eq!(a.par_iter().position_first(|&x| x % 42 == 41), Some(41_usize)); + assert_eq!(a.par_iter().position_first(|&x| x % 19 == 1 && x % 53 == 0), + Some(742_usize)); + assert_eq!(a.par_iter().position_first(|&x| x < 0), None); + + assert_eq!(a.par_iter().find_last(|&&x| x % 42 == 41), Some(&1007_i32)); + assert_eq!(a.par_iter().find_last(|&&x| x % 19 == 1 && x % 53 == 0), + Some(&742_i32)); + assert_eq!(a.par_iter().find_last(|&&x| x < 0), None); + + assert_eq!(a.par_iter().position_last(|&x| x % 42 == 41), Some(1007_usize)); + assert_eq!(a.par_iter().position_last(|&x| x % 19 == 1 && x % 53 == 0), + Some(742_usize)); + assert_eq!(a.par_iter().position_last(|&x| x < 0), None); +} + #[test] pub fn check_find_not_present() { let counter = AtomicUsize::new(0); From febc7ed7a688305cde4ac532760a3f27eeb84b69 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Tue, 20 Dec 2016 17:01:18 -0500 Subject: [PATCH 02/12] Add extra import (related to #173) --- src/par_iter/find_first_last.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index 0817e57e2..752afa7a9 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -1,5 +1,6 @@ use std::cell::Cell; use std::sync::atomic::{AtomicUsize, Ordering}; +use super::internal::*; use super::*; use super::len::*; From 48157c4f5846229ad263612b4d4d3953f6401775 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Thu, 22 Dec 2016 10:48:47 -0500 Subject: [PATCH 03/12] Refactor full calculations for find_first/last --- src/par_iter/find_first_last.rs | 40 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index 752afa7a9..02304e287 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -23,6 +23,14 @@ enum MatchPosition { Rightmost, } +// Returns true if pos1 is a better match than pos2 according to MatchPosition +fn better_position(pos1: usize, pos2: usize, mp: MatchPosition) -> bool { + match mp { + MatchPosition::Leftmost => pos1 < pos2, + MatchPosition::Rightmost => pos1 > pos2, + } +} + pub fn find_first(pi: PAR_ITER, find_op: FIND_OP) -> Option where PAR_ITER: ParallelIterator, FIND_OP: Fn(&PAR_ITER::Item) -> bool + Sync @@ -61,6 +69,13 @@ impl<'f, FIND_OP> FindConsumer<'f, FIND_OP> { best_found: best_found, } } + + fn current_index(&self) -> usize { + match self.match_position { + MatchPosition::Leftmost => self.lower_bound.get(), + MatchPosition::Rightmost => self.upper_bound + } + } } impl<'f, ITEM, FIND_OP> Consumer for FindConsumer<'f, FIND_OP> @@ -85,10 +100,7 @@ impl<'f, ITEM, FIND_OP> Consumer for FindConsumer<'f, FIND_OP> fn into_folder(self) -> Self::Folder { FindFolder { find_op: self.find_op, - boundary: match self.match_position { - MatchPosition::Leftmost => self.lower_bound.get(), - MatchPosition::Rightmost => self.upper_bound - }, + boundary: self.current_index(), match_position: self.match_position, best_found: self.best_found, item: None, @@ -96,13 +108,11 @@ impl<'f, ITEM, FIND_OP> Consumer for FindConsumer<'f, FIND_OP> } fn full(&self) -> bool { - let best = self.best_found.load(Ordering::Relaxed); - match self.match_position { - // can stop consuming if the best found index so far is *strictly* - // better than anything this consumer will find - MatchPosition::Leftmost => best < self.lower_bound.get(), - MatchPosition::Rightmost => best > self.upper_bound - } + // can stop consuming if the best found index so far is *strictly* + // better than anything this consumer will find + better_position(self.best_found.load(Ordering::Relaxed), + self.current_index(), + self.match_position) } } @@ -161,11 +171,9 @@ impl<'f, FIND_OP: 'f + Fn(&ITEM) -> bool, ITEM> Folder for FindFolder<'f, } fn full(&self) -> bool { - let best_found = self.best_found.load(Ordering::Relaxed); - match self.match_position { - MatchPosition::Leftmost => best_found < self.boundary, - MatchPosition::Rightmost => best_found > self.boundary, - } + better_position(self.best_found.load(Ordering::Relaxed), + self.boundary, + self.match_position) } } From d586aaaa95a0a213715b6ecb42695c4d2417be49 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Thu, 22 Dec 2016 10:49:42 -0500 Subject: [PATCH 04/12] Loop to set best_found for find_first/last --- src/par_iter/find_first_last.rs | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index 02304e287..6ff6fb232 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -156,12 +156,24 @@ impl<'f, FIND_OP: 'f + Fn(&ITEM) -> bool, ITEM> Folder for FindFolder<'f, fn consume(mut self, item: ITEM) -> Self { if (self.find_op)(&item) { - // This may sometimes set best_found to a worse index than it was - // before, depending on timing. This means more consumers will - // continue to run than necessary, but the reducer will still ensure - // the correct value is returned. - self.best_found.swap(self.boundary, Ordering::Relaxed); - self.item = Some(item); + // Continuously try to set best_found until we succeed or we + // discover a better match was already found. + let mut current = self.best_found.load(Ordering::Relaxed); + let mut exchange_result = Result::Err(0); + while better_position(self.boundary, current, self.match_position) && + exchange_result.is_err() { + exchange_result = self.best_found.compare_exchange_weak(current, + self.boundary, + Ordering::Relaxed, + Ordering::Relaxed); + if exchange_result.is_err() { + current = self.best_found.load(Ordering::Relaxed); + } + } + + if exchange_result.is_ok() { + self.item = Some(item); + } } self } From fd191d89330b44d86bef1a8f9b1a20f8eb1c1f62 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Thu, 22 Dec 2016 12:20:25 -0500 Subject: [PATCH 05/12] Update docs and comments for find_first/last --- src/par_iter/find_first_last.rs | 34 ++++++++++++++++++++------------- src/par_iter/mod.rs | 4 ++-- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index 6ff6fb232..7c0c8cdd6 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -4,18 +4,22 @@ use super::internal::*; use super::*; use super::len::*; -// The consumer for find_first/find_last has fake indexes representing the lower -// and upper bounds of the "range" of data it consumes. This range does not -// correspond to indexes from the consumed iterator but rather indicate the -// consumer's position relative to other consumers. The purpose is to allow a -// consumer to know it should stop consuming items when another consumer finds a -// better match. - -// An indexed consumer could specialize to use the real indexes instead, but we -// don't implement that for now. The only downside of the current approach is -// that in some cases, iterators very close to each other will have the same -// range and therefore not be able to stop processing if one of them finds a -// better match than the others. +// The key optimization for find_first is that a consumer can stop its search if +// some consumer to its left already found a match (and similarly for consumers +// to the right for find_last). To make this work, all consumers need some +// notion of their position in the data relative to other consumers, including +// unindexed consumers that have no built-in notion of position. +// +// To solve this, we assign each consumer a lower and upper bound for an +// imaginary "range" of data that it consumes. The initial consumer starts with +// the range 0..usize::max_value(). The split divides this range in half so that +// one resulting consumer has the range 0..(usize::max_value() / 2), and the +// other has (usize::max_value() / 2)..usize::max_value(). Every subsequent +// split divides the range in half again until it cannot be split anymore +// (i.e. its length is 1), in which case the split returns two consumers with +// the same range. In that case both consumers will continue to consume all +// their data regardless of whether a better match is found, but the reducer +// will still return the correct answer. #[derive(Copy, Clone)] enum MatchPosition { @@ -124,7 +128,11 @@ impl<'f, ITEM, FIND_OP> UnindexedConsumer for FindConsumer<'f, FIND_OP> // Upper bound for one consumer will be lower bound for the other. This // overlap is okay, because only one of the bounds will be used for // comparing against best_found; the other is kept only to be able to - // divide the range in half + // divide the range in half. + // + // This code assumes that the caller of split_off will use the result as + // the *left* side of this iterator, and the remainder of self as the + // *right* side. let old_lower_bound = self.lower_bound.get(); let median = old_lower_bound + ((self.upper_bound - old_lower_bound) / 2); self.lower_bound.set(median); diff --git a/src/par_iter/mod.rs b/src/par_iter/mod.rs index 4aa793012..7a5d874f3 100644 --- a/src/par_iter/mod.rs +++ b/src/par_iter/mod.rs @@ -599,7 +599,7 @@ pub trait ParallelIterator: Sized { } #[doc(hidden)] - #[deprecated(note = "parallel `find` does not search in order -- use `find_any`")] + #[deprecated(note = "parallel `find` does not search in order -- use `find_any`, `find_first`, or `find_last`")] fn find(self, predicate: FIND_OP) -> Option where FIND_OP: Fn(&Self::Item) -> bool + Sync { @@ -880,7 +880,7 @@ pub trait IndexedParallelIterator: ExactParallelIterator { } #[doc(hidden)] - #[deprecated(note = "parallel `position` does not search in order -- use `position_any`")] + #[deprecated(note = "parallel `position` does not search in order -- use `position_any`, `position_first`, or `position_last`")] fn position(self, predicate: POSITION_OP) -> Option where POSITION_OP: Fn(Self::Item) -> bool + Sync { From ec9499e6be92f7fea38bb3bfb3d35970db82dc95 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Thu, 22 Dec 2016 15:30:06 -0500 Subject: [PATCH 06/12] Avoid clobbering earlier results in find_first --- src/par_iter/find_first_last.rs | 53 +++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index 7c0c8cdd6..afe668187 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -163,12 +163,17 @@ impl<'f, FIND_OP: 'f + Fn(&ITEM) -> bool, ITEM> Folder for FindFolder<'f, type Result = Option; fn consume(mut self, item: ITEM) -> Self { - if (self.find_op)(&item) { + let found_best_in_range = match self.match_position { + MatchPosition::Leftmost => self.item.is_some(), + MatchPosition::Rightmost => false, + }; + + if !found_best_in_range && (self.find_op)(&item) { // Continuously try to set best_found until we succeed or we // discover a better match was already found. let mut current = self.best_found.load(Ordering::Relaxed); let mut exchange_result = Result::Err(0); - while better_position(self.boundary, current, self.match_position) && + while !better_position(current, self.boundary, self.match_position) && exchange_result.is_err() { exchange_result = self.best_found.compare_exchange_weak(current, self.boundary, @@ -191,12 +196,50 @@ impl<'f, FIND_OP: 'f + Fn(&ITEM) -> bool, ITEM> Folder for FindFolder<'f, } fn full(&self) -> bool { - better_position(self.best_found.load(Ordering::Relaxed), - self.boundary, - self.match_position) + let found_best_in_range = match self.match_position { + MatchPosition::Leftmost => self.item.is_some(), + MatchPosition::Rightmost => false, + }; + + found_best_in_range || + better_position(self.best_found.load(Ordering::Relaxed), + self.boundary, + self.match_position) } } +// These tests requires that a folder be assigned to an iterator with more than +// one element. We can't necessarily determine when that will happen for a given +// input to find_first/find_last, so we test the folder directly here instead. +#[test] +pub fn find_first_folder_does_not_clobber_first_found() { + let best_found = AtomicUsize::new(usize::max_value()); + let f = FindFolder { + find_op: &(|&x: &i32| -> bool { true }), + boundary: 0, + match_position: MatchPosition::Leftmost, + best_found: &best_found, + item: None, + }; + let f = f.consume(0_i32).consume(1_i32).consume(2_i32); + assert!(f.full()); + assert_eq!(f.complete(), Some(0_i32)); +} + +#[test] +pub fn find_last_folder_yields_last_match() { + let best_found = AtomicUsize::new(0); + let f = FindFolder { + find_op: &(|&x: &i32| -> bool { true }), + boundary: 0, + match_position: MatchPosition::Rightmost, + best_found: &best_found, + item: None, + }; + let f = f.consume(0_i32).consume(1_i32).consume(2_i32); + assert_eq!(f.complete(), Some(2_i32)); +} + struct FindReducer { match_position: MatchPosition } From aca9f9f81106a19fcbe29aef81d8d02f714bef8b Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Fri, 23 Dec 2016 11:30:32 -0500 Subject: [PATCH 07/12] Test for index resolution exhaustion in find_first/last --- src/par_iter/find_first_last.rs | 50 +++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index afe668187..7954fb203 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -120,6 +120,52 @@ impl<'f, ITEM, FIND_OP> Consumer for FindConsumer<'f, FIND_OP> } } +#[test] +fn same_range_consumers_return_correct_answer() { + let find_op = |x: &i32| x % 2 == 0; + + let first_found = AtomicUsize::new(usize::max_value()); + let first_consumer = FindConsumer::new(&find_op, + MatchPosition::Leftmost, + &first_found); + + // split until we have an indivisible range + let bits_in_usize = usize::min_value().count_zeros(); + for i in 0..bits_in_usize { + first_consumer.split_off(); + } + + let first_reducer = first_consumer.to_reducer(); + // the left and right folders should now have the same range, having + // exhausted the resolution of usize + let left_first_folder = first_consumer.split_off().into_folder(); + let right_first_folder = first_consumer.into_folder(); + + let right_first_folder = right_first_folder.consume(2).consume(3); + let left_first_folder = left_first_folder.consume(0).consume(1); + assert_eq!(first_reducer.reduce(left_first_folder.complete(), + right_first_folder.complete()), + Some(0)); + + // same test, but for find_last + let last_found = AtomicUsize::new(0); + let last_consumer = FindConsumer::new(&find_op, + MatchPosition::Rightmost, + &last_found); + for i in 0..bits_in_usize { + last_consumer.split_off(); + } + + let last_reducer = last_consumer.to_reducer(); + let left_last_folder = last_consumer.split_off().into_folder(); + let right_last_folder = last_consumer.into_folder(); + let right_last_folder = right_last_folder.consume(2).consume(3); + let left_last_folder = left_last_folder.consume(0).consume(1); + assert_eq!(last_reducer.reduce(left_last_folder.complete(), + right_last_folder.complete()), + Some(2)); +} + impl<'f, ITEM, FIND_OP> UnindexedConsumer for FindConsumer<'f, FIND_OP> where ITEM: Send, FIND_OP: Fn(&ITEM) -> bool + Sync @@ -212,7 +258,7 @@ impl<'f, FIND_OP: 'f + Fn(&ITEM) -> bool, ITEM> Folder for FindFolder<'f, // one element. We can't necessarily determine when that will happen for a given // input to find_first/find_last, so we test the folder directly here instead. #[test] -pub fn find_first_folder_does_not_clobber_first_found() { +fn find_first_folder_does_not_clobber_first_found() { let best_found = AtomicUsize::new(usize::max_value()); let f = FindFolder { find_op: &(|&x: &i32| -> bool { true }), @@ -227,7 +273,7 @@ pub fn find_first_folder_does_not_clobber_first_found() { } #[test] -pub fn find_last_folder_yields_last_match() { +fn find_last_folder_yields_last_match() { let best_found = AtomicUsize::new(0); let f = FindFolder { find_op: &(|&x: &i32| -> bool { true }), From 17d2652b116235a124edc9445d129e776862da76 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Fri, 23 Dec 2016 13:16:11 -0500 Subject: [PATCH 08/12] Improve the same-range test for find_first/last --- src/par_iter/find_first_last.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index 7954fb203..db212d3a9 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -141,8 +141,12 @@ fn same_range_consumers_return_correct_answer() { let left_first_folder = first_consumer.split_off().into_folder(); let right_first_folder = first_consumer.into_folder(); - let right_first_folder = right_first_folder.consume(2).consume(3); let left_first_folder = left_first_folder.consume(0).consume(1); + assert_eq!(left_first_folder.boundary, right_first_folder.boundary); + // expect not full even though a better match has been found because the + // ranges are the same + assert!(!right_first_folder.full()); + let right_first_folder = right_first_folder.consume(2).consume(3); assert_eq!(first_reducer.reduce(left_first_folder.complete(), right_first_folder.complete()), Some(0)); @@ -157,9 +161,17 @@ fn same_range_consumers_return_correct_answer() { } let last_reducer = last_consumer.to_reducer(); + // due to the exact calculation in split_off, the very last consumer has a + // range of width 2, so we use the second-to-last consumer instead to get + // the same boundary on both folders + let last_consumer = last_consumer.split_off(); let left_last_folder = last_consumer.split_off().into_folder(); let right_last_folder = last_consumer.into_folder(); let right_last_folder = right_last_folder.consume(2).consume(3); + assert_eq!(left_last_folder.boundary, right_last_folder.boundary); + // expect not full even though a better match has been found because the + // ranges are the same + assert!(!left_last_folder.full()); let left_last_folder = left_last_folder.consume(0).consume(1); assert_eq!(last_reducer.reduce(left_last_folder.complete(), right_last_folder.complete()), @@ -176,6 +188,13 @@ impl<'f, ITEM, FIND_OP> UnindexedConsumer for FindConsumer<'f, FIND_OP> // comparing against best_found; the other is kept only to be able to // divide the range in half. // + // When the resolution of usize has been exhausted (i.e. when + // upper_bound = lower_bound), both results of this split will have the + // same range. When that happens, we lose the ability to tell one + // consumer to stop working when the other finds a better match, but the + // reducer ensures that the best answer is still returned (see the test + // above). + // // This code assumes that the caller of split_off will use the result as // the *left* side of this iterator, and the remainder of self as the // *right* side. From dec880d9c0c104484da992a7853a04a97c6be003 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Fri, 30 Dec 2016 13:03:32 -0500 Subject: [PATCH 09/12] Split same_range test into two tests --- src/par_iter/find_first_last.rs | 59 ++++++++++++++++----------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index db212d3a9..b68eed049 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -121,60 +121,59 @@ impl<'f, ITEM, FIND_OP> Consumer for FindConsumer<'f, FIND_OP> } #[test] -fn same_range_consumers_return_correct_answer() { +fn same_range_first_consumers_return_correct_answer() { let find_op = |x: &i32| x % 2 == 0; - let first_found = AtomicUsize::new(usize::max_value()); - let first_consumer = FindConsumer::new(&find_op, - MatchPosition::Leftmost, - &first_found); + let consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &first_found); // split until we have an indivisible range let bits_in_usize = usize::min_value().count_zeros(); for i in 0..bits_in_usize { - first_consumer.split_off(); + consumer.split_off(); } - let first_reducer = first_consumer.to_reducer(); + let reducer = consumer.to_reducer(); // the left and right folders should now have the same range, having // exhausted the resolution of usize - let left_first_folder = first_consumer.split_off().into_folder(); - let right_first_folder = first_consumer.into_folder(); + let left_folder = consumer.split_off().into_folder(); + let right_folder = consumer.into_folder(); - let left_first_folder = left_first_folder.consume(0).consume(1); - assert_eq!(left_first_folder.boundary, right_first_folder.boundary); + let left_folder = left_folder.consume(0).consume(1); + assert_eq!(left_folder.boundary, right_folder.boundary); // expect not full even though a better match has been found because the // ranges are the same - assert!(!right_first_folder.full()); - let right_first_folder = right_first_folder.consume(2).consume(3); - assert_eq!(first_reducer.reduce(left_first_folder.complete(), - right_first_folder.complete()), + assert!(!right_folder.full()); + let right_folder = right_folder.consume(2).consume(3); + assert_eq!(reducer.reduce(left_folder.complete(), right_folder.complete()), Some(0)); +} - // same test, but for find_last +#[test] +fn same_range_last_consumers_return_correct_answer() { + let find_op = |x: &i32| x % 2 == 0; let last_found = AtomicUsize::new(0); - let last_consumer = FindConsumer::new(&find_op, - MatchPosition::Rightmost, - &last_found); + let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &last_found); + + // split until we have an indivisible range + let bits_in_usize = usize::min_value().count_zeros(); for i in 0..bits_in_usize { - last_consumer.split_off(); + consumer.split_off(); } - let last_reducer = last_consumer.to_reducer(); + let reducer = consumer.to_reducer(); // due to the exact calculation in split_off, the very last consumer has a // range of width 2, so we use the second-to-last consumer instead to get // the same boundary on both folders - let last_consumer = last_consumer.split_off(); - let left_last_folder = last_consumer.split_off().into_folder(); - let right_last_folder = last_consumer.into_folder(); - let right_last_folder = right_last_folder.consume(2).consume(3); - assert_eq!(left_last_folder.boundary, right_last_folder.boundary); + let consumer = consumer.split_off(); + let left_folder = consumer.split_off().into_folder(); + let right_folder = consumer.into_folder(); + let right_folder = right_folder.consume(2).consume(3); + assert_eq!(left_folder.boundary, right_folder.boundary); // expect not full even though a better match has been found because the // ranges are the same - assert!(!left_last_folder.full()); - let left_last_folder = left_last_folder.consume(0).consume(1); - assert_eq!(last_reducer.reduce(left_last_folder.complete(), - right_last_folder.complete()), + assert!(!left_folder.full()); + let left_folder = left_folder.consume(0).consume(1); + assert_eq!(reducer.reduce(left_folder.complete(), right_folder.complete()), Some(2)); } From 5c359ab4094433f7cb909d1531c6c73a89f6afb9 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Fri, 30 Dec 2016 15:54:03 -0500 Subject: [PATCH 10/12] Test fullness of find_first/last consumers outside exhausted range --- src/par_iter/find_first_last.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index b68eed049..96d9aef91 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -124,7 +124,11 @@ impl<'f, ITEM, FIND_OP> Consumer for FindConsumer<'f, FIND_OP> fn same_range_first_consumers_return_correct_answer() { let find_op = |x: &i32| x % 2 == 0; let first_found = AtomicUsize::new(usize::max_value()); - let consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &first_found); + let far_right_consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &first_found); + + // We save a consumer that will be far to the right of the main consumer (and therefore not + // sharing an index range with that consumer) for fullness testing + let consumer = far_right_consumer.split_off(); // split until we have an indivisible range let bits_in_usize = usize::min_value().count_zeros(); @@ -143,6 +147,7 @@ fn same_range_first_consumers_return_correct_answer() { // expect not full even though a better match has been found because the // ranges are the same assert!(!right_folder.full()); + assert!(far_right_consumer.full()); let right_folder = right_folder.consume(2).consume(3); assert_eq!(reducer.reduce(left_folder.complete(), right_folder.complete()), Some(0)); @@ -154,6 +159,10 @@ fn same_range_last_consumers_return_correct_answer() { let last_found = AtomicUsize::new(0); let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &last_found); + // We save a consumer that will be far to the left of the main consumer (and therefore not + // sharing an index range with that consumer) for fullness testing + let far_left_consumer = consumer.split_off(); + // split until we have an indivisible range let bits_in_usize = usize::min_value().count_zeros(); for i in 0..bits_in_usize { @@ -172,6 +181,7 @@ fn same_range_last_consumers_return_correct_answer() { // expect not full even though a better match has been found because the // ranges are the same assert!(!left_folder.full()); + assert!(far_left_consumer.full()); let left_folder = left_folder.consume(0).consume(1); assert_eq!(reducer.reduce(left_folder.complete(), right_folder.complete()), Some(2)); From efeee217f62d0d7c239cdc5cc32e290c71e0bca9 Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Sat, 31 Dec 2016 11:32:41 -0500 Subject: [PATCH 11/12] Rewrite the loop to set best_found in find_first/last --- src/par_iter/find_first_last.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last.rs index 96d9aef91..f60d97c21 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last.rs @@ -246,20 +246,20 @@ impl<'f, FIND_OP: 'f + Fn(&ITEM) -> bool, ITEM> Folder for FindFolder<'f, // Continuously try to set best_found until we succeed or we // discover a better match was already found. let mut current = self.best_found.load(Ordering::Relaxed); - let mut exchange_result = Result::Err(0); - while !better_position(current, self.boundary, self.match_position) && - exchange_result.is_err() { - exchange_result = self.best_found.compare_exchange_weak(current, - self.boundary, - Ordering::Relaxed, - Ordering::Relaxed); - if exchange_result.is_err() { - current = self.best_found.load(Ordering::Relaxed); + loop { + if better_position(current, self.boundary, self.match_position) { + break; + } + match self.best_found.compare_exchange_weak(current, + self.boundary, + Ordering::Relaxed, + Ordering::Relaxed) { + Ok(_) => { + self.item = Some(item); + break; + }, + Err(v) => current = v, } - } - - if exchange_result.is_ok() { - self.item = Some(item); } } self From e2977b1fd431b3ac9fb092235bb2e7211dc389ae Mon Sep 17 00:00:00 2001 From: Jonathan Schuster Date: Sat, 31 Dec 2016 11:38:44 -0500 Subject: [PATCH 12/12] Move find_first/last tests to a separate file --- .../mod.rs} | 70 +------------------ src/par_iter/find_first_last/test.rs | 69 ++++++++++++++++++ 2 files changed, 72 insertions(+), 67 deletions(-) rename src/par_iter/{find_first_last.rs => find_first_last/mod.rs} (76%) create mode 100644 src/par_iter/find_first_last/test.rs diff --git a/src/par_iter/find_first_last.rs b/src/par_iter/find_first_last/mod.rs similarity index 76% rename from src/par_iter/find_first_last.rs rename to src/par_iter/find_first_last/mod.rs index f60d97c21..1e8f21aee 100644 --- a/src/par_iter/find_first_last.rs +++ b/src/par_iter/find_first_last/mod.rs @@ -4,6 +4,9 @@ use super::internal::*; use super::*; use super::len::*; +#[cfg(test)] +mod test; + // The key optimization for find_first is that a consumer can stop its search if // some consumer to its left already found a match (and similarly for consumers // to the right for find_last). To make this work, all consumers need some @@ -120,73 +123,6 @@ impl<'f, ITEM, FIND_OP> Consumer for FindConsumer<'f, FIND_OP> } } -#[test] -fn same_range_first_consumers_return_correct_answer() { - let find_op = |x: &i32| x % 2 == 0; - let first_found = AtomicUsize::new(usize::max_value()); - let far_right_consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &first_found); - - // We save a consumer that will be far to the right of the main consumer (and therefore not - // sharing an index range with that consumer) for fullness testing - let consumer = far_right_consumer.split_off(); - - // split until we have an indivisible range - let bits_in_usize = usize::min_value().count_zeros(); - for i in 0..bits_in_usize { - consumer.split_off(); - } - - let reducer = consumer.to_reducer(); - // the left and right folders should now have the same range, having - // exhausted the resolution of usize - let left_folder = consumer.split_off().into_folder(); - let right_folder = consumer.into_folder(); - - let left_folder = left_folder.consume(0).consume(1); - assert_eq!(left_folder.boundary, right_folder.boundary); - // expect not full even though a better match has been found because the - // ranges are the same - assert!(!right_folder.full()); - assert!(far_right_consumer.full()); - let right_folder = right_folder.consume(2).consume(3); - assert_eq!(reducer.reduce(left_folder.complete(), right_folder.complete()), - Some(0)); -} - -#[test] -fn same_range_last_consumers_return_correct_answer() { - let find_op = |x: &i32| x % 2 == 0; - let last_found = AtomicUsize::new(0); - let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &last_found); - - // We save a consumer that will be far to the left of the main consumer (and therefore not - // sharing an index range with that consumer) for fullness testing - let far_left_consumer = consumer.split_off(); - - // split until we have an indivisible range - let bits_in_usize = usize::min_value().count_zeros(); - for i in 0..bits_in_usize { - consumer.split_off(); - } - - let reducer = consumer.to_reducer(); - // due to the exact calculation in split_off, the very last consumer has a - // range of width 2, so we use the second-to-last consumer instead to get - // the same boundary on both folders - let consumer = consumer.split_off(); - let left_folder = consumer.split_off().into_folder(); - let right_folder = consumer.into_folder(); - let right_folder = right_folder.consume(2).consume(3); - assert_eq!(left_folder.boundary, right_folder.boundary); - // expect not full even though a better match has been found because the - // ranges are the same - assert!(!left_folder.full()); - assert!(far_left_consumer.full()); - let left_folder = left_folder.consume(0).consume(1); - assert_eq!(reducer.reduce(left_folder.complete(), right_folder.complete()), - Some(2)); -} - impl<'f, ITEM, FIND_OP> UnindexedConsumer for FindConsumer<'f, FIND_OP> where ITEM: Send, FIND_OP: Fn(&ITEM) -> bool + Sync diff --git a/src/par_iter/find_first_last/test.rs b/src/par_iter/find_first_last/test.rs new file mode 100644 index 000000000..c973f61ea --- /dev/null +++ b/src/par_iter/find_first_last/test.rs @@ -0,0 +1,69 @@ +use std::sync::atomic::AtomicUsize; +use super::*; + +#[test] +fn same_range_first_consumers_return_correct_answer() { + let find_op = |x: &i32| x % 2 == 0; + let first_found = AtomicUsize::new(usize::max_value()); + let far_right_consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &first_found); + + // We save a consumer that will be far to the right of the main consumer (and therefore not + // sharing an index range with that consumer) for fullness testing + let consumer = far_right_consumer.split_off(); + + // split until we have an indivisible range + let bits_in_usize = usize::min_value().count_zeros(); + for i in 0..bits_in_usize { + consumer.split_off(); + } + + let reducer = consumer.to_reducer(); + // the left and right folders should now have the same range, having + // exhausted the resolution of usize + let left_folder = consumer.split_off().into_folder(); + let right_folder = consumer.into_folder(); + + let left_folder = left_folder.consume(0).consume(1); + assert_eq!(left_folder.boundary, right_folder.boundary); + // expect not full even though a better match has been found because the + // ranges are the same + assert!(!right_folder.full()); + assert!(far_right_consumer.full()); + let right_folder = right_folder.consume(2).consume(3); + assert_eq!(reducer.reduce(left_folder.complete(), right_folder.complete()), + Some(0)); +} + +#[test] +fn same_range_last_consumers_return_correct_answer() { + let find_op = |x: &i32| x % 2 == 0; + let last_found = AtomicUsize::new(0); + let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &last_found); + + // We save a consumer that will be far to the left of the main consumer (and therefore not + // sharing an index range with that consumer) for fullness testing + let far_left_consumer = consumer.split_off(); + + // split until we have an indivisible range + let bits_in_usize = usize::min_value().count_zeros(); + for i in 0..bits_in_usize { + consumer.split_off(); + } + + let reducer = consumer.to_reducer(); + // due to the exact calculation in split_off, the very last consumer has a + // range of width 2, so we use the second-to-last consumer instead to get + // the same boundary on both folders + let consumer = consumer.split_off(); + let left_folder = consumer.split_off().into_folder(); + let right_folder = consumer.into_folder(); + let right_folder = right_folder.consume(2).consume(3); + assert_eq!(left_folder.boundary, right_folder.boundary); + // expect not full even though a better match has been found because the + // ranges are the same + assert!(!left_folder.full()); + assert!(far_left_consumer.full()); + let left_folder = left_folder.consume(0).consume(1); + assert_eq!(reducer.reduce(left_folder.complete(), right_folder.complete()), + Some(2)); +}