5/26/2014

Heap and maintain the median

Here the classic approach to handle a heap is insert new element at the root and sift it down.
 #include <iostream>  
 #include <vector>  
 #include <algorithm>  
 #include <stdexcept>  
 #include <stdlib.h>  
 #include <memory>  
 #include <time.h>   
 #include <set>  
 using namespace std;  
 template <typename T>  
 class Heap {  
  vector<T> _heap;  
 public:  
  // never call a virtual function from constructor  
  void build(const vector<T>& nums) {  
   for(int i = 0; i < nums.size(); ++i) {  
    offer(nums[i]);  
   }  
  }  
  T peek() const {  
   if(!_heap.empty()) {  
    return _heap[0];  
   } else {  
    std::cerr << "heap is empty" << std::endl;  
   }  
  }  
  void offer(T val) { //insert  
   _heap.insert(_heap.begin(), val);  
   heapify();  
  }  
  T poll() { // delete  
   int val = _heap[0];  
   swap(0, _heap.size() - 1);  
   _heap.pop_back();  
   heapify();  
   return val;  
  }  
  void print() {  
   for_each(_heap.begin(), _heap.end(), [](T val) {  
     std::cout << val << ", ";  
    });  
   std::cout << std::endl;  
  }  
  int size() const {  
   return _heap.size();  
  }  
  virtual ~ Heap() {}  
 private:  
 
  void heapify() {  
   heapify(0);  
  }  
  void heapify(int i) {  
   //sift down  
   int l = left(i);  
   int r = right(i);  
   int largest = i;  
   if(l < _heap.size() && comp(_heap[l], _heap[largest])) {  
    largest = l;  
   }  
   if(r < _heap.size() && comp(_heap[r], _heap[largest])) {  
    largest = r;  
   }  
   if (largest != i) {    
    swap(largest, i);  
    heapify(largest);  
   }  
  }  
  inline void swap(int i, int j) {  
   if(i >= _heap.size() && j >= _heap.size()) {  
    throw std::runtime_error("Out of bound!");  
   }  
   int temp = _heap[i];  
   _heap[i] = _heap[j];  
   _heap[j] = temp;  
  }  
  inline int parent(int i) {  
   return i / 2;  
  }  
  inline int left(int i) {  
   return 2 * i;  
  }  
  inline int right(int i) {  
   return 2 * i + 1;  
  }  
  virtual bool comp(T val1, T val2) const = 0;  
 };  

 template<typename T>  
 class MaxHeap : public Heap<T> {  
 public:  
  virtual ~MaxHeap() {}  
 private:  
  virtual bool comp(T val1, T val2) const {  
   return val1 > val2;  
  }  
 };  
 template <typename T>  
 class MinHeap : public Heap<T> {  
 public:  
  virtual ~MinHeap() {}  
 private:  
  virtual bool comp(T val1, T val2) const {  
   return val1 < val2;  
  }  
 };  

 class StreamMediam {  
 private:  
  MinHeap<int> _minHeap;  
  MaxHeap<int> _maxHeap;  
  set<int> _record;  
 public:  
  StreamMediam() {  
   srand (time(NULL));    
  }  
  int genRandNum(int maxNum) {  
   return rand() % maxNum;  
  }  
  void accept(int maxNum = 100) {  
   int val = genRandNum(maxNum);  
   _record.insert(val);  
   if(_minHeap.size() == _maxHeap.size()) {  
    if(_minHeap.size() > 0 && val > _minHeap.peek()) {  
     _maxHeap.offer(_minHeap.poll());  
     _minHeap.offer(val);  
    } else {  
     _maxHeap.offer(val);  
    }  
   } else {  
    if(val < _maxHeap.peek()) {  
     _minHeap.offer(_maxHeap.poll());  
     _maxHeap.offer(val);  
    } else {  
     _minHeap.offer(val);  
    }  
   }  
  }  
  int getMedian() const {  
   if(_minHeap.size() == _maxHeap.size()) {  
    return (_minHeap.peek() + _maxHeap.peek() ) / 2;  
   } else {  
    return _maxHeap.peek();  
   }  
  }  
  void print() {  
    for_each(_record.begin(), _record.end(), [](int val) {  
      std::cout << val << ", ";  
     });  
     std::cout << std::endl;  
  }  
 };  

 int main(int argc, char *argv[])  
 {  
  vector<int> nums{4,1,3,2,16,9,10,14,8,7};  
  shared_ptr<Heap<int> > heap1(new MaxHeap<int>());  
  heap1->build(nums);  
  heap1->print();  
  shared_ptr<Heap<int> > heap2(new MinHeap<int>());  
  heap2->build(nums);  
  heap2->print();  
  StreamMediam sm;  
  for(int i = 0; i < 10; ++i) {  
   sm.accept();  
   sm.print();  
   std::cout <<  sm.getMedian() << std::endl;  
  }  
  return 0;  
 }  

No comments: