Sort and get index list in C++

Alec Jacobson

December 02, 2010

weblog/

Once again, I'm implementing convenient MATLAB functions in C++. Here's a templated sort function that sorts a vector A into a vector B but also returns an index vector I, such that A[j] = B[I[j]]. This is convenient if you are using the sort of one vector to sort another vector. I used this to sort a vector eigenvalues and then sort a vector of eigenvectors (vector of vectors) in the same order. Here is the header you should save in a file called sort.h:
#include <vector>
#include <algorithm>
// Act like matlab's [Y,I] = SORT(X)
// Input:
//   unsorted  unsorted vector
// Output:
//   sorted     sorted vector, allowed to be same as unsorted
//   index_map  an index map such that sorted[i] = unsorted[index_map[i]]
template <class T>
void sort(
    std::vector<T> &unsorted,
    std::vector<T> &sorted,
    std::vector<size_t> &index_map);

// Act like matlab's Y = X[I]
// where I contains a vector of indices so that after,
// Y[j] = X[I[j]] for index j
// this implies that Y.size() == I.size()
// X and Y are allowed to be the same reference
template< class T >
void reorder(
  std::vector<T> & unordered, 
  std::vector<size_t> const & index_map, 
  std::vector<T> & ordered);

////////////////////////////////////////////////////////////////////////////////
// Implementation
////////////////////////////////////////////////////////////////////////////////


// Comparison struct used by sort
// http://bytes.com/topic/c/answers/132045-sort-get-index
template<class T> struct index_cmp 
{
  index_cmp(const T arr) : arr(arr) {}
  bool operator()(const size_t a, const size_t b) const
  { 
    return arr[a] < arr[b];
  }
  const T arr;
};

template <class T>
void sort(
  std::vector<T> & unsorted,
  std::vector<T> & sorted,
  std::vector<size_t> & index_map)
{
  // Original unsorted index map
  index_map.resize(unsorted.size());
  for(size_t i=0;i<unsorted.size();i++)
  {
    index_map[i] = i;
  }
  // Sort the index map, using unsorted for comparison
  sort(
    index_map.begin(), 
    index_map.end(), 
    index_cmp<std::vector<T>& >(unsorted));

  sorted.resize(unsorted.size());
  reorder(unsorted,index_map,sorted);
}

// This implementation is O(n), but also uses O(n) extra memory
template< class T >
void reorder(
  std::vector<T> & unordered, 
  std::vector<size_t> const & index_map, 
  std::vector<T> & ordered)
{
  // copy for the reorder according to index_map, because unsorted may also be
  // sorted
  std::vector<T> copy = unordered;
  ordered.resize(index_map.size());
  for(int i = 0; i<index_map.size();i++)
  {
    ordered[i] = copy[index_map[i]];
  }
}
You can test it out with this little program, main.cpp:
#include <sort.h>
int main(void)
{
  // sort a into b
  std::vector<double> a;
  a.resize(5);
  a[0] = 1; a[1] = 4; a[2] = 2; a[3] = 5; a[4] = 3;
  std::vector<size_t> i;
  std::vector<double> b;
  sort(a,b,i);
  for(int j = 0;j<a.size();j++)
  {
    printf("b[%d] = %g = a[i[%d]] = a[%d] = %g\n",j,b[j],j,i[j],a[i[j]]);
  }
  printf("\n");

  // sort c "in place"
  std::vector<char> c;
  c.resize(5);
  c[0] = 'b'; c[1] = 'd'; c[2] = 'a'; c[3] = 'e'; c[4] = 'c';
  std::vector<char> old_c = c;
  sort(c,c,i);
  for(int j = 0;j<c.size();j++)
  {
    printf("c[%d] = %c = old_c[i[%d]] = old_c[%d] = %c\n",
      j,c[j],j,i[j],old_c[i[j]]);
  }
  printf("\n");

  // sort d into e and use i to sort d into f afterwards
  std::vector<int> d;
  d.resize(5);
  d[0] = 5; d[1] = -1; d[2] = -10; d[3] = -2; d[4] = -6;
  std::vector<int> e;
  sort(d,e,i);
  std::vector<int> f;
  reorder(d,i,f);
  for(int j = 0;j<d.size();j++)
  {
    printf("f[%d] = %d = e[%d] = %d = d[i[%d]] = d[%d] = %d\n",
        j,f[j],j,e[j],j,i[j],d[i[j]]);
  }
  printf("\n");
}
Which I compile on my mac with:
g++ -g main.cpp -o sort_example -I .
The issuing ./sort_example I see:
b[0] = 1 = a[i[0]] = a[0] = 1
b[1] = 2 = a[i[1]] = a[2] = 2
b[2] = 3 = a[i[2]] = a[4] = 3
b[3] = 4 = a[i[3]] = a[1] = 4
b[4] = 5 = a[i[4]] = a[3] = 5

c[0] = a = old_c[i[0]] = old_c[2] = a
c[1] = b = old_c[i[1]] = old_c[0] = b
c[2] = c = old_c[i[2]] = old_c[4] = c
c[3] = d = old_c[i[3]] = old_c[1] = d
c[4] = e = old_c[i[4]] = old_c[3] = e

f[0] = -10 = e[0] = -10 = d[i[0]] = d[2] = -10
f[1] = -6 = e[1] = -6 = d[i[1]] = d[4] = -6
f[2] = -2 = e[2] = -2 = d[i[2]] = d[3] = -2
f[3] = -1 = e[3] = -1 = d[i[3]] = d[1] = -1
f[4] = 5 = e[4] = 5 = d[i[4]] = d[0] = 5