Function block [src]

Alias for std.sort.block.block

Stable in-place sort. O(n) best case, O(n*log(n)) worst case and average case. O(1) memory (no allocator required). Sorts in ascending order with respect to the given lessThan function. NOTE: The algorithm only works when the comparison is less-than or greater-than. (See https://github.com/ziglang/zig/issues/8289)

Prototype

pub fn block( comptime T: type, items: []T, context: anytype, comptime lessThanFn: fn (@TypeOf(context), lhs: T, rhs: T) bool, ) void

Parameters

T: typeitems: []TlessThanFn: fn (@TypeOf(context), lhs: T, rhs: T) bool

Source

pub fn block( comptime T: type, items: []T, context: anytype, comptime lessThanFn: fn (@TypeOf(context), lhs: T, rhs: T) bool, ) void { const lessThan = if (builtin.mode == .Debug) struct { fn lessThan(ctx: @TypeOf(context), lhs: T, rhs: T) bool { const lt = lessThanFn(ctx, lhs, rhs); const gt = lessThanFn(ctx, rhs, lhs); std.debug.assert(!(lt and gt)); return lt; } }.lessThan else lessThanFn; // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c var cache: [512]T = undefined; if (items.len < 4) { if (items.len == 3) { // hard coded insertion sort if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); if (lessThan(context, items[2], items[1])) { mem.swap(T, &items[1], &items[2]); if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); } } else if (items.len == 2) { if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); } return; } // sort groups of 4-8 items at a time using an unstable sorting network, // but keep track of the original item orders to force it to be stable // http://pages.ripco.net/~jgamble/nw.html var iterator = Iterator.init(items.len, 4); while (!iterator.finished()) { var order = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7 }; const range = iterator.nextRange(); const sliced_items = items[range.start..]; switch (range.length()) { 8 => { swap(T, sliced_items, &order, 0, 1, context, lessThan); swap(T, sliced_items, &order, 2, 3, context, lessThan); swap(T, sliced_items, &order, 4, 5, context, lessThan); swap(T, sliced_items, &order, 6, 7, context, lessThan); swap(T, sliced_items, &order, 0, 2, context, lessThan); swap(T, sliced_items, &order, 1, 3, context, lessThan); swap(T, sliced_items, &order, 4, 6, context, lessThan); swap(T, sliced_items, &order, 5, 7, context, lessThan); swap(T, sliced_items, &order, 1, 2, context, lessThan); swap(T, sliced_items, &order, 5, 6, context, lessThan); swap(T, sliced_items, &order, 0, 4, context, lessThan); swap(T, sliced_items, &order, 3, 7, context, lessThan); swap(T, sliced_items, &order, 1, 5, context, lessThan); swap(T, sliced_items, &order, 2, 6, context, lessThan); swap(T, sliced_items, &order, 1, 4, context, lessThan); swap(T, sliced_items, &order, 3, 6, context, lessThan); swap(T, sliced_items, &order, 2, 4, context, lessThan); swap(T, sliced_items, &order, 3, 5, context, lessThan); swap(T, sliced_items, &order, 3, 4, context, lessThan); }, 7 => { swap(T, sliced_items, &order, 1, 2, context, lessThan); swap(T, sliced_items, &order, 3, 4, context, lessThan); swap(T, sliced_items, &order, 5, 6, context, lessThan); swap(T, sliced_items, &order, 0, 2, context, lessThan); swap(T, sliced_items, &order, 3, 5, context, lessThan); swap(T, sliced_items, &order, 4, 6, context, lessThan); swap(T, sliced_items, &order, 0, 1, context, lessThan); swap(T, sliced_items, &order, 4, 5, context, lessThan); swap(T, sliced_items, &order, 2, 6, context, lessThan); swap(T, sliced_items, &order, 0, 4, context, lessThan); swap(T, sliced_items, &order, 1, 5, context, lessThan); swap(T, sliced_items, &order, 0, 3, context, lessThan); swap(T, sliced_items, &order, 2, 5, context, lessThan); swap(T, sliced_items, &order, 1, 3, context, lessThan); swap(T, sliced_items, &order, 2, 4, context, lessThan); swap(T, sliced_items, &order, 2, 3, context, lessThan); }, 6 => { swap(T, sliced_items, &order, 1, 2, context, lessThan); swap(T, sliced_items, &order, 4, 5, context, lessThan); swap(T, sliced_items, &order, 0, 2, context, lessThan); swap(T, sliced_items, &order, 3, 5, context, lessThan); swap(T, sliced_items, &order, 0, 1, context, lessThan); swap(T, sliced_items, &order, 3, 4, context, lessThan); swap(T, sliced_items, &order, 2, 5, context, lessThan); swap(T, sliced_items, &order, 0, 3, context, lessThan); swap(T, sliced_items, &order, 1, 4, context, lessThan); swap(T, sliced_items, &order, 2, 4, context, lessThan); swap(T, sliced_items, &order, 1, 3, context, lessThan); swap(T, sliced_items, &order, 2, 3, context, lessThan); }, 5 => { swap(T, sliced_items, &order, 0, 1, context, lessThan); swap(T, sliced_items, &order, 3, 4, context, lessThan); swap(T, sliced_items, &order, 2, 4, context, lessThan); swap(T, sliced_items, &order, 2, 3, context, lessThan); swap(T, sliced_items, &order, 1, 4, context, lessThan); swap(T, sliced_items, &order, 0, 3, context, lessThan); swap(T, sliced_items, &order, 0, 2, context, lessThan); swap(T, sliced_items, &order, 1, 3, context, lessThan); swap(T, sliced_items, &order, 1, 2, context, lessThan); }, 4 => { swap(T, sliced_items, &order, 0, 1, context, lessThan); swap(T, sliced_items, &order, 2, 3, context, lessThan); swap(T, sliced_items, &order, 0, 2, context, lessThan); swap(T, sliced_items, &order, 1, 3, context, lessThan); swap(T, sliced_items, &order, 1, 2, context, lessThan); }, else => {}, } } if (items.len < 8) return; // then merge sort the higher levels, which can be 8-15, 16-31, 32-63, 64-127, etc. while (true) { // if every A and B block will fit into the cache, use a special branch // specifically for merging with the cache // (we use < rather than <= since the block size might be one more than // iterator.length()) if (iterator.length() < cache.len) { // if four subarrays fit into the cache, it's faster to merge both // pairs of subarrays into the cache, // then merge the two merged subarrays from the cache back into the original array if ((iterator.length() + 1) * 4 <= cache.len and iterator.length() * 4 <= items.len) { iterator.begin(); while (!iterator.finished()) { // merge A1 and B1 into the cache var A1 = iterator.nextRange(); var B1 = iterator.nextRange(); var A2 = iterator.nextRange(); var B2 = iterator.nextRange(); if (lessThan(context, items[B1.end - 1], items[A1.start])) { // the two ranges are in reverse order, so copy them in reverse order into the cache const a1_items = items[A1.start..A1.end]; @memcpy(cache[B1.length()..][0..a1_items.len], a1_items); const b1_items = items[B1.start..B1.end]; @memcpy(cache[0..b1_items.len], b1_items); } else if (lessThan(context, items[B1.start], items[A1.end - 1])) { // these two ranges weren't already in order, so merge them into the cache mergeInto(T, items, A1, B1, cache[0..], context, lessThan); } else { // if A1, B1, A2, and B2 are all in order, skip doing anything else if (!lessThan(context, items[B2.start], items[A2.end - 1]) and !lessThan(context, items[A2.start], items[B1.end - 1])) continue; // copy A1 and B1 into the cache in the same order const a1_items = items[A1.start..A1.end]; @memcpy(cache[0..a1_items.len], a1_items); const b1_items = items[B1.start..B1.end]; @memcpy(cache[A1.length()..][0..b1_items.len], b1_items); } A1 = Range.init(A1.start, B1.end); // merge A2 and B2 into the cache if (lessThan(context, items[B2.end - 1], items[A2.start])) { // the two ranges are in reverse order, so copy them in reverse order into the cache const a2_items = items[A2.start..A2.end]; @memcpy(cache[A1.length() + B2.length() ..][0..a2_items.len], a2_items); const b2_items = items[B2.start..B2.end]; @memcpy(cache[A1.length()..][0..b2_items.len], b2_items); } else if (lessThan(context, items[B2.start], items[A2.end - 1])) { // these two ranges weren't already in order, so merge them into the cache mergeInto(T, items, A2, B2, cache[A1.length()..], context, lessThan); } else { // copy A2 and B2 into the cache in the same order const a2_items = items[A2.start..A2.end]; @memcpy(cache[A1.length()..][0..a2_items.len], a2_items); const b2_items = items[B2.start..B2.end]; @memcpy(cache[A1.length() + A2.length() ..][0..b2_items.len], b2_items); } A2 = Range.init(A2.start, B2.end); // merge A1 and A2 from the cache into the items const A3 = Range.init(0, A1.length()); const B3 = Range.init(A1.length(), A1.length() + A2.length()); if (lessThan(context, cache[B3.end - 1], cache[A3.start])) { // the two ranges are in reverse order, so copy them in reverse order into the items const a3_items = cache[A3.start..A3.end]; @memcpy(items[A1.start + A2.length() ..][0..a3_items.len], a3_items); const b3_items = cache[B3.start..B3.end]; @memcpy(items[A1.start..][0..b3_items.len], b3_items); } else if (lessThan(context, cache[B3.start], cache[A3.end - 1])) { // these two ranges weren't already in order, so merge them back into the items mergeInto(T, cache[0..], A3, B3, items[A1.start..], context, lessThan); } else { // copy A3 and B3 into the items in the same order const a3_items = cache[A3.start..A3.end]; @memcpy(items[A1.start..][0..a3_items.len], a3_items); const b3_items = cache[B3.start..B3.end]; @memcpy(items[A1.start + A1.length() ..][0..b3_items.len], b3_items); } } // we merged two levels at the same time, so we're done with this level already // (iterator.nextLevel() is called again at the bottom of this outer merge loop) _ = iterator.nextLevel(); } else { iterator.begin(); while (!iterator.finished()) { const A = iterator.nextRange(); const B = iterator.nextRange(); if (lessThan(context, items[B.end - 1], items[A.start])) { // the two ranges are in reverse order, so a simple rotation should fix it mem.rotate(T, items[A.start..B.end], A.length()); } else if (lessThan(context, items[B.start], items[A.end - 1])) { // these two ranges weren't already in order, so we'll need to merge them! const a_items = items[A.start..A.end]; @memcpy(cache[0..a_items.len], a_items); mergeExternal(T, items, A, B, cache[0..], context, lessThan); } } } } else { // this is where the in-place merge logic starts! // 1. pull out two internal buffers each containing √A unique values // 1a. adjust block_size and buffer_size if we couldn't find enough unique values // 2. loop over the A and B subarrays within this level of the merge sort // 3. break A and B into blocks of size 'block_size' // 4. "tag" each of the A blocks with values from the first internal buffer // 5. roll the A blocks through the B blocks and drop/rotate them where they belong // 6. merge each A block with any B values that follow, using the cache or the second internal buffer // 7. sort the second internal buffer if it exists // 8. redistribute the two internal buffers back into the items var block_size: usize = math.sqrt(iterator.length()); var buffer_size = iterator.length() / block_size + 1; // as an optimization, we really only need to pull out the internal buffers once for each level of merges // after that we can reuse the same buffers over and over, then redistribute it when we're finished with this level var A: Range = undefined; var B: Range = undefined; var index: usize = 0; var last: usize = 0; var count: usize = 0; var find: usize = 0; var start: usize = 0; var pull_index: usize = 0; var pull = [_]Pull{ Pull{ .from = 0, .to = 0, .count = 0, .range = Range.init(0, 0), }, Pull{ .from = 0, .to = 0, .count = 0, .range = Range.init(0, 0), }, }; var buffer1 = Range.init(0, 0); var buffer2 = Range.init(0, 0); // find two internal buffers of size 'buffer_size' each find = buffer_size + buffer_size; var find_separately = false; if (block_size <= cache.len) { // if every A block fits into the cache then we won't need the second internal buffer, // so we really only need to find 'buffer_size' unique values find = buffer_size; } else if (find > iterator.length()) { // we can't fit both buffers into the same A or B subarray, so find two buffers separately find = buffer_size; find_separately = true; } // we need to find either a single contiguous space containing 2√A unique values (which will be split up into two buffers of size √A each), // or we need to find one buffer of < 2√A unique values, and a second buffer of √A unique values, // OR if we couldn't find that many unique values, we need the largest possible buffer we can get // in the case where it couldn't find a single buffer of at least √A unique values, // all of the Merge steps must be replaced by a different merge algorithm (MergeInPlace) iterator.begin(); while (!iterator.finished()) { A = iterator.nextRange(); B = iterator.nextRange(); // just store information about where the values will be pulled from and to, // as well as how many values there are, to create the two internal buffers // check A for the number of unique values we need to fill an internal buffer // these values will be pulled out to the start of A last = A.start; count = 1; while (count < find) : ({ last = index; count += 1; }) { index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), find - count, context, lessThan); if (index == A.end) break; } index = last; if (count >= buffer_size) { // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffer pull[pull_index] = Pull{ .range = Range.init(A.start, B.end), .count = count, .from = index, .to = A.start, }; pull_index = 1; if (count == buffer_size + buffer_size) { // we were able to find a single contiguous section containing 2√A unique values, // so this section can be used to contain both of the internal buffers we'll need buffer1 = Range.init(A.start, A.start + buffer_size); buffer2 = Range.init(A.start + buffer_size, A.start + count); break; } else if (find == buffer_size + buffer_size) { // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, // so we still need to find a second separate buffer of at least √A unique values buffer1 = Range.init(A.start, A.start + count); find = buffer_size; } else if (block_size <= cache.len) { // we found the first and only internal buffer that we need, so we're done! buffer1 = Range.init(A.start, A.start + count); break; } else if (find_separately) { // found one buffer, but now find the other one buffer1 = Range.init(A.start, A.start + count); find_separately = false; } else { // we found a second buffer in an 'A' subarray containing √A unique values, so we're done! buffer2 = Range.init(A.start, A.start + count); break; } } else if (pull_index == 0 and count > buffer1.length()) { // keep track of the largest buffer we were able to find buffer1 = Range.init(A.start, A.start + count); pull[pull_index] = Pull{ .range = Range.init(A.start, B.end), .count = count, .from = index, .to = A.start, }; } // check B for the number of unique values we need to fill an internal buffer // these values will be pulled out to the end of B last = B.end - 1; count = 1; while (count < find) : ({ last = index - 1; count += 1; }) { index = findFirstBackward(T, items, items[last], Range.init(B.start, last), find - count, context, lessThan); if (index == B.start) break; } index = last; if (count >= buffer_size) { // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffe pull[pull_index] = Pull{ .range = Range.init(A.start, B.end), .count = count, .from = index, .to = B.end, }; pull_index = 1; if (count == buffer_size + buffer_size) { // we were able to find a single contiguous section containing 2√A unique values, // so this section can be used to contain both of the internal buffers we'll need buffer1 = Range.init(B.end - count, B.end - buffer_size); buffer2 = Range.init(B.end - buffer_size, B.end); break; } else if (find == buffer_size + buffer_size) { // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, // so we still need to find a second separate buffer of at least √A unique values buffer1 = Range.init(B.end - count, B.end); find = buffer_size; } else if (block_size <= cache.len) { // we found the first and only internal buffer that we need, so we're done! buffer1 = Range.init(B.end - count, B.end); break; } else if (find_separately) { // found one buffer, but now find the other one buffer1 = Range.init(B.end - count, B.end); find_separately = false; } else { // buffer2 will be pulled out from a 'B' subarray, so if the first buffer was pulled out from the corresponding 'A' subarray, // we need to adjust the end point for that A subarray so it knows to stop redistributing its values before reaching buffer2 if (pull[0].range.start == A.start) pull[0].range.end -= pull[1].count; // we found a second buffer in an 'B' subarray containing √A unique values, so we're done! buffer2 = Range.init(B.end - count, B.end); break; } } else if (pull_index == 0 and count > buffer1.length()) { // keep track of the largest buffer we were able to find buffer1 = Range.init(B.end - count, B.end); pull[pull_index] = Pull{ .range = Range.init(A.start, B.end), .count = count, .from = index, .to = B.end, }; } } // pull out the two ranges so we can use them as internal buffers pull_index = 0; while (pull_index < 2) : (pull_index += 1) { const length = pull[pull_index].count; if (pull[pull_index].to < pull[pull_index].from) { // we're pulling the values out to the left, which means the start of an A subarray index = pull[pull_index].from; count = 1; while (count < length) : (count += 1) { index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, context, lessThan); const range = Range.init(index + 1, pull[pull_index].from + 1); mem.rotate(T, items[range.start..range.end], range.length() - count); pull[pull_index].from = index + count; } } else if (pull[pull_index].to > pull[pull_index].from) { // we're pulling values out to the right, which means the end of a B subarray index = pull[pull_index].from + 1; count = 1; while (count < length) : (count += 1) { index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), length - count, context, lessThan); const range = Range.init(pull[pull_index].from, index - 1); mem.rotate(T, items[range.start..range.end], count); pull[pull_index].from = index - 1 - count; } } } // adjust block_size and buffer_size based on the values we were able to pull out buffer_size = buffer1.length(); block_size = iterator.length() / buffer_size + 1; // the first buffer NEEDS to be large enough to tag each of the evenly sized A blocks, // so this was originally here to test the math for adjusting block_size above // assert((iterator.length() + 1)/block_size <= buffer_size); // now that the two internal buffers have been created, it's time to merge each A+B combination at this level of the merge sort! iterator.begin(); while (!iterator.finished()) { A = iterator.nextRange(); B = iterator.nextRange(); // remove any parts of A or B that are being used by the internal buffers start = A.start; if (start == pull[0].range.start) { if (pull[0].from > pull[0].to) { A.start += pull[0].count; // if the internal buffer takes up the entire A or B subarray, then there's nothing to merge // this only happens for very small subarrays, like √4 = 2, 2 * (2 internal buffers) = 4, // which also only happens when cache.len is small or 0 since it'd otherwise use MergeExternal if (A.length() == 0) continue; } else if (pull[0].from < pull[0].to) { B.end -= pull[0].count; if (B.length() == 0) continue; } } if (start == pull[1].range.start) { if (pull[1].from > pull[1].to) { A.start += pull[1].count; if (A.length() == 0) continue; } else if (pull[1].from < pull[1].to) { B.end -= pull[1].count; if (B.length() == 0) continue; } } if (lessThan(context, items[B.end - 1], items[A.start])) { // the two ranges are in reverse order, so a simple rotation should fix it mem.rotate(T, items[A.start..B.end], A.length()); } else if (lessThan(context, items[A.end], items[A.end - 1])) { // these two ranges weren't already in order, so we'll need to merge them! var findA: usize = undefined; // break the remainder of A into blocks. firstA is the uneven-sized first A block var blockA = Range.init(A.start, A.end); var firstA = Range.init(A.start, A.start + blockA.length() % block_size); // swap the first value of each A block with the value in buffer1 var indexA = buffer1.start; index = firstA.end; while (index < blockA.end) : ({ indexA += 1; index += block_size; }) { mem.swap(T, &items[indexA], &items[index]); } // start rolling the A blocks through the B blocks! // whenever we leave an A block behind, we'll need to merge the previous A block with any B blocks that follow it, so track that information as well var lastA = firstA; var lastB = Range.init(0, 0); var blockB = Range.init(B.start, B.start + @min(block_size, B.length())); blockA.start += firstA.length(); indexA = buffer1.start; // if the first unevenly sized A block fits into the cache, copy it there for when we go to Merge it // otherwise, if the second buffer is available, block swap the contents into that if (lastA.length() <= cache.len) { const last_a_items = items[lastA.start..lastA.end]; @memcpy(cache[0..last_a_items.len], last_a_items); } else if (buffer2.length() > 0) { blockSwap(T, items, lastA.start, buffer2.start, lastA.length()); } if (blockA.length() > 0) { while (true) { // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. if ((lastB.length() > 0 and !lessThan(context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { // figure out where to split the previous B block, and rotate it at the split const B_split = binaryFirst(T, items, items[indexA], lastB, context, lessThan); const B_remaining = lastB.end - B_split; // swap the minimum A block to the beginning of the rolling A blocks var minA = blockA.start; findA = minA + block_size; while (findA < blockA.end) : (findA += block_size) { if (lessThan(context, items[findA], items[minA])) { minA = findA; } } blockSwap(T, items, blockA.start, minA, block_size); // swap the first item of the previous A block back with its original value, which is stored in buffer1 mem.swap(T, &items[blockA.start], &items[indexA]); indexA += 1; // locally merge the previous A block with the B values that follow it // if lastA fits into the external cache we'll use that (with MergeExternal), // or if the second internal buffer exists we'll use that (with MergeInternal), // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) if (lastA.length() <= cache.len) { mergeExternal(T, items, lastA, Range.init(lastA.end, B_split), cache[0..], context, lessThan); } else if (buffer2.length() > 0) { mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, context, lessThan); } else { mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), context, lessThan); } if (buffer2.length() > 0 or block_size <= cache.len) { // copy the previous A block into the cache or buffer2, since that's where we need it to be when we go to merge it anyway if (block_size <= cache.len) { @memcpy(cache[0..block_size], items[blockA.start..][0..block_size]); } else { blockSwap(T, items, blockA.start, buffer2.start, block_size); } // this is equivalent to rotating, but faster // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs blockSwap(T, items, B_split, blockA.start + block_size - B_remaining, B_remaining); } else { // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation mem.rotate(T, items[B_split .. blockA.start + block_size], blockA.start - B_split); } // update the range for the remaining A blocks, and the range remaining from the B block after it was split lastA = Range.init(blockA.start - B_remaining, blockA.start - B_remaining + block_size); lastB = Range.init(lastA.end, lastA.end + B_remaining); // if there are no more A blocks remaining, this step is finished! blockA.start += block_size; if (blockA.length() == 0) break; } else if (blockB.length() < block_size) { // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation // the cache is disabled here since it might contain the contents of the previous A block mem.rotate(T, items[blockA.start..blockB.end], blockB.start - blockA.start); lastB = Range.init(blockA.start, blockA.start + blockB.length()); blockA.start += blockB.length(); blockA.end += blockB.length(); blockB.end = blockB.start; } else { // roll the leftmost A block to the end by swapping it with the next B block blockSwap(T, items, blockA.start, blockB.start, block_size); lastB = Range.init(blockA.start, blockA.start + block_size); blockA.start += block_size; blockA.end += block_size; blockB.start += block_size; if (blockB.end > B.end - block_size) { blockB.end = B.end; } else { blockB.end += block_size; } } } } // merge the last A block with the remaining B values if (lastA.length() <= cache.len) { mergeExternal(T, items, lastA, Range.init(lastA.end, B.end), cache[0..], context, lessThan); } else if (buffer2.length() > 0) { mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, context, lessThan); } else { mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), context, lessThan); } } } // when we're finished with this merge step we should have the one // or two internal buffers left over, where the second buffer is all jumbled up // insertion sort the second buffer, then redistribute the buffers // back into the items using the opposite process used for creating the buffer // while an unstable sort like quicksort could be applied here, in benchmarks // it was consistently slightly slower than a simple insertion sort, // even for tens of millions of items. this may be because insertion // sort is quite fast when the data is already somewhat sorted, like it is here sort.insertion(T, items[buffer2.start..buffer2.end], context, lessThan); pull_index = 0; while (pull_index < 2) : (pull_index += 1) { var unique = pull[pull_index].count * 2; if (pull[pull_index].from > pull[pull_index].to) { // the values were pulled out to the left, so redistribute them back to the right var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); while (buffer.length() > 0) { index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, context, lessThan); const amount = index - buffer.end; mem.rotate(T, items[buffer.start..index], buffer.length()); buffer.start += (amount + 1); buffer.end += amount; unique -= 2; } } else if (pull[pull_index].from < pull[pull_index].to) { // the values were pulled out to the right, so redistribute them back to the left var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); while (buffer.length() > 0) { index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, context, lessThan); const amount = buffer.start - index; mem.rotate(T, items[index..buffer.end], amount); buffer.start -= amount; buffer.end -= (amount + 1); unique -= 2; } } } } // double the size of each A and B subarray that will be merged in the next level if (!iterator.nextLevel()) break; } }