/////////////////////////////////////////////////////////////////////// // File: genericvector.h // Description: Generic vector class // Author: Daria Antonova // Created: Mon Jun 23 11:26:43 PDT 2008 // // (C) Copyright 2007, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// // #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_ #define TESSERACT_CCUTIL_GENERICVECTOR_H_ #include #include #include "tesscallback.h" #include "errcode.h" #include "helpers.h" #include "ndminx.h" // Use PointerVector below in preference to GenericVector, as that // provides automatic deletion of pointers, [De]Serialize that works, and // sort that works. template class GenericVector { public: GenericVector() { this->init(kDefaultVectorSize); } explicit GenericVector(int size) { this->init(size); } // Copy GenericVector(const GenericVector& other) { this->init(other.size()); this->operator+=(other); } GenericVector &operator+=(const GenericVector& other); GenericVector &operator=(const GenericVector& other); virtual ~GenericVector(); // Reserve some memory. void reserve(int size); // Double the size of the internal array. void double_the_size(); // Resizes to size and sets all values to t. void init_to_size(int size, T t); // Return the size used. int size() const { return size_used_; } int length() const { return size_used_; } // Return true if empty. bool empty() const { return size_used_ == 0; } // Return the object from an index. T &get(int index) const; T &operator[](int index) const; // Return the index of the T object. // This method NEEDS a compare_callback to be passed to // set_compare_callback. int get_index(T object) const; // Return true if T is in the array bool contains(T object) const; // Return true if the index is valid T contains_index(int index) const; // Push an element in the end of the array int push_back(T object); void operator+=(T t); // Push an element in the front of the array // Note: This function is O(n) int push_front(T object); // Set the value at the given index void set(T t, int index); // Insert t at the given index, push other elements to the right. void insert(T t, int index); // Removes an element at the given index and // shifts the remaining elements to the left. virtual void remove(int index); // Truncates the array to the given size by removing the end. // If the current size is less, the array is not expanded. virtual void truncate(int size) { if (size < size_used_) size_used_ = size; } // Add a callback to be called to delete the elements when the array took // their ownership. void set_clear_callback(TessCallback1* cb); // Add a callback to be called to compare the elements when needed (contains, // get_id, ...) void set_compare_callback(TessResultCallback2* cb); // Clear the array, calling the clear callback function if any. // All the owned callbacks are also deleted. // If you don't want the callbacks to be deleted, before calling clear, set // the callback to NULL. virtual void clear(); // Delete objects pointed to by data_[i] void delete_data_pointers(); // This method clears the current object, then, does a shallow copy of // its argument, and finally invalidate its argument. // Callbacks are moved to the current object; void move(GenericVector* from); // Read/Write the array to a file. This does _NOT_ read/write the callbacks. // The callback given must be permanent since they will be called more than // once. The given callback will be deleted at the end. // If the callbacks are NULL, then the data is simply read/written using // fread (and swapping)/fwrite. // Returns false on error or if the callback returns false. // DEPRECATED. Use [De]Serialize[Classes] instead. bool write(FILE* f, TessResultCallback2* cb) const; bool read(FILE* f, TessResultCallback3* cb, bool swap); // Writes a vector of simple types to the given file. Assumes that bitwise // read/write of T will work. Returns false in case of error. virtual bool Serialize(FILE* fp) const; // Reads a vector of simple types from the given file. Assumes that bitwise // read/write will work with ReverseN according to sizeof(T). // Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. virtual bool DeSerialize(bool swap, FILE* fp); // Writes a vector of classes to the given file. Assumes the existence of // bool T::Serialize(FILE* fp) const that returns false in case of error. // Returns false in case of error. bool SerializeClasses(FILE* fp) const; // Reads a vector of classes from the given file. Assumes the existence of // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of // error. Also needs T::T() and T::T(constT&), as init_to_size is used in // this function. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool DeSerializeClasses(bool swap, FILE* fp); // Allocates a new array of double the current_size, copies over the // information from data to the new location, deletes data and returns // the pointed to the new larger array. // This function uses memcpy to copy the data, instead of invoking // operator=() for each element like double_the_size() does. static T *double_the_size_memcpy(int current_size, T *data) { T *data_new = new T[current_size * 2]; memcpy(data_new, data, sizeof(T) * current_size); delete[] data; return data_new; } // Sorts the members of this vector using the less than comparator (cmp_lt), // which compares the values. Useful for GenericVectors to primitive types. // Will not work so great for pointers (unless you just want to sort some // pointers). You need to provide a specialization to sort_cmp to use // your type. void sort(); // Sort the array into the order defined by the qsort function comparator. // The comparator function is as defined by qsort, ie. it receives pointers // to two Ts and returns negative if the first element is to appear earlier // in the result and positive if it is to appear later, with 0 for equal. void sort(int (*comparator)(const void*, const void*)) { qsort(data_, size_used_, sizeof(*data_), comparator); } // Searches the array (assuming sorted in ascending order, using sort()) for // an element equal to target and returns true if it is present. // Use binary_search to get the index of target, or its nearest candidate. bool bool_binary_search(const T& target) const { int index = binary_search(target); if (index >= size_used_) return false; return data_[index] == target; } // Searches the array (assuming sorted in ascending order, using sort()) for // an element equal to target and returns the index of the best candidate. // The return value is the largest index i such that data_[i] <= target or 0. int binary_search(const T& target) const { int bottom = 0; int top = size_used_; do { int middle = (bottom + top) / 2; if (data_[middle] > target) top = middle; else bottom = middle; } while (top - bottom > 1); return bottom; } // Compact the vector by deleting elements using operator!= on basic types. // The vector must be sorted. void compact_sorted() { if (size_used_ == 0) return; // First element is in no matter what, hence the i = 1. int last_write = 0; for (int i = 1; i < size_used_; ++i) { // Finds next unique item and writes it. if (data_[last_write] != data_[i]) data_[++last_write] = data_[i]; } // last_write is the index of a valid data cell, so add 1. size_used_ = last_write + 1; } // Compact the vector by deleting elements for which delete_cb returns // true. delete_cb is a permanent callback and will be deleted. void compact(TessResultCallback1* delete_cb) { int new_size = 0; int old_index = 0; // Until the callback returns true, the elements stay the same. while (old_index < size_used_ && !delete_cb->Run(old_index++)) ++new_size; // Now just copy anything else that gets false from delete_cb. for (; old_index < size_used_; ++old_index) { if (!delete_cb->Run(old_index)) { data_[new_size++] = data_[old_index]; } } size_used_ = new_size; delete delete_cb; } T dot_product(const GenericVector& other) const { T result = static_cast(0); for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i) result += data_[i] * other.data_[i]; return result; } protected: // Init the object, allocating size memory. void init(int size); // We are assuming that the object generally placed in thie // vector are small enough that for efficiency it makes sence // to start with a larger initial size. static const int kDefaultVectorSize = 4; inT32 size_used_; inT32 size_reserved_; T* data_; TessCallback1* clear_cb_; // Mutable because Run method is not const mutable TessResultCallback2* compare_cb_; }; namespace tesseract { template bool cmp_eq(T const & t1, T const & t2) { return t1 == t2; } // Used by sort() // return < 0 if t1 < t2 // return 0 if t1 == t2 // return > 0 if t1 > t2 template int sort_cmp(const void* t1, const void* t2) { const T* a = static_cast (t1); const T* b = static_cast (t2); if (*a < *b) { return -1; } else if (*b < *a) { return 1; } else { return 0; } } // Used by PointerVector::sort() // return < 0 if t1 < t2 // return 0 if t1 == t2 // return > 0 if t1 > t2 template int sort_ptr_cmp(const void* t1, const void* t2) { const T* a = *reinterpret_cast(t1); const T* b = *reinterpret_cast(t2); if (*a < *b) { return -1; } else if (*b < *a) { return 1; } else { return 0; } } // Subclass for a vector of pointers. Use in preference to GenericVector // as it provides automatic deletion and correct serialization, with the // corollary that all copy operations are deep copies of the pointed-to objects. template class PointerVector : public GenericVector { public: PointerVector() : GenericVector() { } explicit PointerVector(int size) : GenericVector(size) { } virtual ~PointerVector() { // Clear must be called here, even though it is called again by the base, // as the base will call the wrong clear. clear(); } // Copy must be deep, as the pointers will be automatically deleted on // destruction. PointerVector(const PointerVector& other) { init(other.size()); this->operator+=(other); } PointerVector& operator+=(const PointerVector& other) { reserve(this->size_used_ + other.size_used_); for (int i = 0; i < other.size(); ++i) { push_back(new T(*other.data_[i])); } return *this; } PointerVector& operator=(const PointerVector& other) { this->truncate(0); this->operator+=(other); return *this; } // Removes an element at the given index and // shifts the remaining elements to the left. virtual void remove(int index) { delete GenericVector::data_[index]; GenericVector::remove(index); } // Truncates the array to the given size by removing the end. // If the current size is less, the array is not expanded. virtual void truncate(int size) { for (int i = size; i < GenericVector::size_used_; ++i) delete GenericVector::data_[i]; GenericVector::truncate(size); } // Clear the array, calling the clear callback function if any. // All the owned callbacks are also deleted. // If you don't want the callbacks to be deleted, before calling clear, set // the callback to NULL. virtual void clear() { GenericVector::delete_data_pointers(); GenericVector::clear(); } // Writes a vector of simple types to the given file. Assumes that bitwise // read/write of T will work. Returns false in case of error. virtual bool Serialize(FILE* fp) const { inT32 used = GenericVector::size_used_; if (fwrite(&used, sizeof(used), 1, fp) != 1) return false; for (int i = 0; i < used; ++i) { inT8 non_null = GenericVector::data_[i] != NULL; if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false; if (non_null && !GenericVector::data_[i]->Serialize(fp)) return false; } return true; } // Reads a vector of simple types from the given file. Assumes that bitwise // read/write will work with ReverseN according to sizeof(T). // Also needs T::T(), as new T is used in this function. // Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. virtual bool DeSerialize(bool swap, FILE* fp) { inT32 reserved; if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; if (swap) Reverse32(&reserved); GenericVector::reserve(reserved); for (int i = 0; i < reserved; ++i) { inT8 non_null; if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false; T* item = NULL; if (non_null) { item = new T; if (!item->DeSerialize(swap, fp)) return false; } push_back(item); } return true; } // Sorts the items pointed to by the members of this vector using // t::operator<(). void sort() { sort(&sort_ptr_cmp); } }; } // namespace tesseract // A useful vector that uses operator== to do comparisons. template class GenericVectorEqEq : public GenericVector { public: GenericVectorEqEq() { GenericVector::set_compare_callback( NewPermanentTessCallback(tesseract::cmp_eq)); } GenericVectorEqEq(int size) : GenericVector(size) { GenericVector::set_compare_callback( NewPermanentTessCallback(tesseract::cmp_eq)); } }; template void GenericVector::init(int size) { size_used_ = 0; size_reserved_ = 0; data_ = 0; clear_cb_ = 0; compare_cb_ = 0; reserve(size); } template GenericVector::~GenericVector() { clear(); } // Reserve some memory. If the internal array contains elements, they are // copied. template void GenericVector::reserve(int size) { if (size_reserved_ >= size || size <= 0) return; T* new_array = new T[size]; for (int i = 0; i < size_used_; ++i) new_array[i] = data_[i]; if (data_ != NULL) delete[] data_; data_ = new_array; size_reserved_ = size; } template void GenericVector::double_the_size() { if (size_reserved_ == 0) { reserve(kDefaultVectorSize); } else { reserve(2 * size_reserved_); } } // Resizes to size and sets all values to t. template void GenericVector::init_to_size(int size, T t) { reserve(size); size_used_ = size; for (int i = 0; i < size; ++i) data_[i] = t; } // Return the object from an index. template T &GenericVector::get(int index) const { ASSERT_HOST(index >= 0 && index < size_used_); return data_[index]; } template T &GenericVector::operator[](int index) const { return data_[index]; } // Return the object from an index. template void GenericVector::set(T t, int index) { ASSERT_HOST(index >= 0 && index < size_used_); data_[index] = t; } // Shifts the rest of the elements to the right to make // space for the new elements and inserts the given element // at the specified index. template void GenericVector::insert(T t, int index) { ASSERT_HOST(index >= 0 && index < size_used_); if (size_reserved_ == size_used_) double_the_size(); for (int i = size_used_; i > index; --i) { data_[i] = data_[i-1]; } data_[index] = t; size_used_++; } // Removes an element at the given index and // shifts the remaining elements to the left. template void GenericVector::remove(int index) { ASSERT_HOST(index >= 0 && index < size_used_); for (int i = index; i < size_used_ - 1; ++i) { data_[i] = data_[i+1]; } size_used_--; } // Return true if the index is valindex template T GenericVector::contains_index(int index) const { return index >= 0 && index < size_used_; } // Return the index of the T object. template int GenericVector::get_index(T object) const { for (int i = 0; i < size_used_; ++i) { ASSERT_HOST(compare_cb_ != NULL); if (compare_cb_->Run(object, data_[i])) return i; } return -1; } // Return true if T is in the array template bool GenericVector::contains(T object) const { return get_index(object) != -1; } // Add an element in the array template int GenericVector::push_back(T object) { int index = 0; if (size_used_ == size_reserved_) double_the_size(); index = size_used_++; data_[index] = object; return index; } // Add an element in the array (front) template int GenericVector::push_front(T object) { if (size_used_ == size_reserved_) double_the_size(); for (int i = size_used_; i > 0; --i) data_[i] = data_[i-1]; data_[0] = object; ++size_used_; return 0; } template void GenericVector::operator+=(T t) { push_back(t); } template GenericVector &GenericVector::operator+=(const GenericVector& other) { this->reserve(size_used_ + other.size_used_); for (int i = 0; i < other.size(); ++i) { this->operator+=(other.data_[i]); } return *this; } template GenericVector &GenericVector::operator=(const GenericVector& other) { this->truncate(0); this->operator+=(other); return *this; } // Add a callback to be called to delete the elements when the array took // their ownership. template void GenericVector::set_clear_callback(TessCallback1* cb) { clear_cb_ = cb; } // Add a callback to be called to delete the elements when the array took // their ownership. template void GenericVector::set_compare_callback(TessResultCallback2* cb) { compare_cb_ = cb; } // Clear the array, calling the callback function if any. template void GenericVector::clear() { if (size_reserved_ > 0) { if (clear_cb_ != NULL) for (int i = 0; i < size_used_; ++i) clear_cb_->Run(data_[i]); delete[] data_; data_ = NULL; size_used_ = 0; size_reserved_ = 0; } if (clear_cb_ != NULL) { delete clear_cb_; clear_cb_ = NULL; } if (compare_cb_ != NULL) { delete compare_cb_; compare_cb_ = NULL; } } template void GenericVector::delete_data_pointers() { for (int i = 0; i < size_used_; ++i) if (data_[i]) { delete data_[i]; } } template bool GenericVector::write( FILE* f, TessResultCallback2* cb) const { if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false; if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false; if (cb != NULL) { for (int i = 0; i < size_used_; ++i) { if (!cb->Run(f, data_[i])) { delete cb; return false; } } delete cb; } else { if (fwrite(data_, sizeof(T), size_used_, f) != size_used_) return false; } return true; } template bool GenericVector::read(FILE* f, TessResultCallback3* cb, bool swap) { inT32 reserved; if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false; if (swap) Reverse32(&reserved); reserve(reserved); if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false; if (swap) Reverse32(&size_used_); if (cb != NULL) { for (int i = 0; i < size_used_; ++i) { if (!cb->Run(f, data_ + i, swap)) { delete cb; return false; } } delete cb; } else { if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false; if (swap) { for (int i = 0; i < size_used_; ++i) ReverseN(&data_[i], sizeof(T)); } } return true; } // Writes a vector of simple types to the given file. Assumes that bitwise // read/write of T will work. Returns false in case of error. template bool GenericVector::Serialize(FILE* fp) const { if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false; return true; } // Reads a vector of simple types from the given file. Assumes that bitwise // read/write will work with ReverseN according to sizeof(T). // Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. template bool GenericVector::DeSerialize(bool swap, FILE* fp) { inT32 reserved; if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; if (swap) Reverse32(&reserved); reserve(reserved); size_used_ = reserved; if (fread(data_, sizeof(T), size_used_, fp) != size_used_) return false; if (swap) { for (int i = 0; i < size_used_; ++i) ReverseN(&data_[i], sizeof(data_[i])); } return true; } // Writes a vector of classes to the given file. Assumes the existence of // bool T::Serialize(FILE* fp) const that returns false in case of error. // Returns false in case of error. template bool GenericVector::SerializeClasses(FILE* fp) const { if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; for (int i = 0; i < size_used_; ++i) { if (!data_[i].Serialize(fp)) return false; } return true; } // Reads a vector of classes from the given file. Assumes the existence of // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of // error. Alse needs T::T() and T::T(constT&), as init_to_size is used in // this function. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. template bool GenericVector::DeSerializeClasses(bool swap, FILE* fp) { uinT32 reserved; if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; if (swap) Reverse32(&reserved); T empty; init_to_size(reserved, empty); for (int i = 0; i < reserved; ++i) { if (!data_[i].DeSerialize(swap, fp)) return false; } return true; } // This method clear the current object, then, does a shallow copy of // its argument, and finally invalindate its argument. template void GenericVector::move(GenericVector* from) { this->clear(); this->data_ = from->data_; this->size_reserved_ = from->size_reserved_; this->size_used_ = from->size_used_; this->compare_cb_ = from->compare_cb_; this->clear_cb_ = from->clear_cb_; from->data_ = NULL; from->clear_cb_ = NULL; from->compare_cb_ = NULL; from->size_used_ = 0; from->size_reserved_ = 0; } template void GenericVector::sort() { sort(&tesseract::sort_cmp); } #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_