Let's dig into code first.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <assert.h> | |
#include <stdlib.h> | |
#include <tictoc.h> | |
#include <time.h> | |
#include <vector> | |
const int NUM_ELEMENTS = 1e7; | |
bool isSorted(int array[], int left, int right) { | |
for (int i=left; i<right; i++) | |
if (array[i] > array[i+1]) return false; | |
return true; | |
} | |
void swap(int array[], int i, int j) { | |
int temp = array[i]; | |
array[i] = array[j]; | |
array[j] = temp; | |
} | |
int binary_partition(int array[], int left, int right) { | |
int pivot = left; | |
int i = left + 1; | |
int j = right; | |
while (true) { | |
while (i<j && array[i] < array[pivot]) i++; | |
while (array[j] > array[pivot]) j--; | |
if (i >= j) break; | |
swap(array, i++, j--); | |
} | |
swap(array, pivot, j); | |
return j; | |
} | |
void ternary_partition(int array[], int left, int right, int& p1, int& p2) { | |
int pivot1, pivot2; | |
if (array[left] > array[right]) { | |
swap(array, left, right); | |
} | |
pivot1 = array[left]; | |
pivot2 = array[right]; | |
int i=left+1, j=left+1, k=right-1; | |
while (j <= k) { | |
if (array[j] <= pivot1) { | |
swap(array, i++, j++); | |
} | |
else if (array[j] <= pivot2) { | |
j++; | |
} | |
else { | |
swap(array, j, k--); | |
} | |
} | |
swap(array, left, i-1); | |
swap(array, right, k+1); | |
p1 = i-1; | |
p2 = k+1; | |
} | |
void binary_quicksort(int array[], int left, int right) { | |
if (left >= right) return; | |
int j = binary_partition(array, left, right); | |
binary_quicksort(array, left, j-1); | |
binary_quicksort(array, j+1, right); | |
} | |
void ternary_quicksort(int array[], int left, int right) { | |
if (left >= right) return; | |
int i, j; | |
ternary_partition(array, left, right, i, j); | |
ternary_quicksort(array, left, i-1); | |
ternary_quicksort(array, i+1, j-1); | |
ternary_quicksort(array, j+1, right); | |
} | |
void quicksort_3way(int array[], int left, int right) { | |
if (left >= right) return; | |
int pivot_val = array[left]; | |
int i = left; | |
int j = i+1; | |
int k = right; | |
while (j <= k) { | |
if (array[j] == pivot_val) j++; | |
else if (array[j] < pivot_val) swap(array, i++, j++); | |
else swap(array, j, k--); | |
} | |
quicksort_3way(array, left, i-1); | |
quicksort_3way(array, j, right); | |
} | |
using namespace std; | |
int main(int argc, char** argv) { | |
srand(time(NULL)); | |
vector<int> array(NUM_ELEMENTS); | |
int MOD = atoi(argv[1]); | |
for (int i=0; i<NUM_ELEMENTS; i++) | |
array[i] = rand()%MOD; | |
vector<int> array1(array), array2(array), array3(array); | |
tic(); | |
binary_quicksort(&array1[0], 0, NUM_ELEMENTS-1); | |
toc(); | |
assert(isSorted(&array1[0], 0, NUM_ELEMENTS-1)); | |
tic(); | |
ternary_quicksort(&array2[0], 0, NUM_ELEMENTS-1); | |
toc(); | |
assert(isSorted(&array2[0], 0, NUM_ELEMENTS-1)); | |
tic(); | |
quicksort_3way(&array3[0], 0, NUM_ELEMENTS-1); | |
toc(); | |
assert(isSorted(&array3[0], 0, NUM_ELEMENTS-1)); | |
return 0; | |
} |
The binary quicksort is what everyone is most familiar with. The left partition contains those less or equal to pivot value, whereas the right partition contains those who are greater or equal to the pivot value.
The ternary quicksort is similar in that it divides into three partitions and two pivot values. The left-most partition contains those less or equal to the first pivot value, the middle partition contains those in between two pivot values, and the right-most partition contains those greater than the second pivot value. Note that I am assuming the first pivot value is less or equal to the second pivot value.
Lastly, the 3-way quicksort partitions the array into 3, where the first is strictly less, the middle is equal to, and the last is strictly greater.
Let's examine their performance. First, when the array values are mostly distinct:
$ g++ -g quicksort.cpp -ltictoc -O3 && ./a.out 1000000000
Time elapsed: 0.983398s
Time elapsed: 0.883001s
Time elapsed: 1.151062s
It seems like ternary quicksort is the fastest among these.
When the array values are less distinct, however, we observe that 3-way quicksort performance starts to take off, while ternary quicksort slows downs greatly.
$ g++ -g quicksort.cpp -ltictoc -O3 && ./a.out 100000
Time elapsed: 0.808114s
Time elapsed: 1.043392s
Time elapsed: 0.829712s
$ g++ -g quicksort.cpp -ltictoc -O3 && ./a.out 1000
Time elapsed: 0.612664s
Time elapsed: 27.944554s
Time elapsed: 0.427633s
We will examine what is going on as we introduce more duplicate key values in the next post.
No comments:
Post a Comment