@@ -74,21 +74,22 @@ std::vector<IndexRange> splitInputRange(const IndexRange& range,
7474 threads = parsedThreads;
7575 }
7676
77- // do we need to dial back the threads based on the grainSize?
77+ // compute grainSize (including enforcing requested minimum)
7878 std::size_t length = range.end () - range.begin ();
79- threads = std::min (threads, length / grainSize);
80-
81- // determine the chunk size
82- std::size_t chunkSize = length / threads;
79+ if (threads == 1 )
80+ grainSize = length;
81+ else if ((length % threads) == 0 ) // perfect division
82+ grainSize = std::max (length / threads, grainSize);
83+ else // imperfect division, divide by threads - 1
84+ grainSize = std::max (length / (threads-1 ), grainSize);
8385
8486 // allocate ranges
8587 std::vector<IndexRange> ranges;
86- std::size_t nextIndex = range.begin ();
87- for (std::size_t i = 0 ; i<threads; i++) {
88- std::size_t begin = nextIndex;
89- std::size_t end = std::min (begin + chunkSize, range.end ());
90- ranges.push_back (IndexRange (begin, end));
91- nextIndex = end;
88+ std::size_t begin = range.begin ();
89+ while (begin < range.end ()) {
90+ std::size_t end = std::min (begin + grainSize, range.end ());
91+ ranges.push_back (IndexRange (begin, end));
92+ begin = end;
9293 }
9394
9495 // return ranges
0 commit comments