diff --git a/CMakeLists.txt b/CMakeLists.txt index b69cb5b71f..f40d77c1dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -536,6 +536,7 @@ endif (CMAKE_CXX_COMPILER_ID STREQUAL GNU OR CMAKE_CXX_COMPILER_ID STREQUAL Clan add_subdirectory (extern/osg-ffmpeg-videoplayer) add_subdirectory (extern/oics) add_subdirectory (extern/Base64) +add_subdirectory (extern/murmurhash) if (BUILD_OPENCS) add_subdirectory (extern/osgQt) endif() diff --git a/apps/opencs/CMakeLists.txt b/apps/opencs/CMakeLists.txt index 88c4233c9c..9a70950879 100644 --- a/apps/opencs/CMakeLists.txt +++ b/apps/opencs/CMakeLists.txt @@ -226,6 +226,7 @@ target_link_libraries(openmw-cs ${OSGTEXT_LIBRARIES} ${OSG_LIBRARIES} ${EXTERN_OSGQT_LIBRARY} + ${MURMURHASH_LIBRARIES} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY} ${Boost_PROGRAM_OPTIONS_LIBRARY} diff --git a/apps/opencs/model/world/collection.hpp b/apps/opencs/model/world/collection.hpp index 8fc5d076ce..89430f1c58 100644 --- a/apps/opencs/model/world/collection.hpp +++ b/apps/opencs/model/world/collection.hpp @@ -95,8 +95,6 @@ namespace CSMWorld protected: - const std::map& getIdMap() const; - const std::vector > >& getRecords() const; bool reorderRowsImp (int baseIndex, const std::vector& newOrder); @@ -205,12 +203,6 @@ namespace CSMWorld NestableColumn *getNestableColumn (int column) const; }; - template - const std::map& Collection::getIdMap() const - { - return mIndex; - } - template const std::vector > >& Collection::getRecords() const { diff --git a/apps/opencs/model/world/collectionbase.cpp b/apps/opencs/model/world/collectionbase.cpp index 6134dc1727..f20fc643e2 100644 --- a/apps/opencs/model/world/collectionbase.cpp +++ b/apps/opencs/model/world/collectionbase.cpp @@ -8,6 +8,11 @@ CSMWorld::CollectionBase::CollectionBase() {} CSMWorld::CollectionBase::~CollectionBase() {} +int CSMWorld::CollectionBase::getInsertIndex (const std::string& id, UniversalId::Type type, RecordBase *record) const +{ + return getAppendIndex(id, type); +} + int CSMWorld::CollectionBase::searchColumnIndex (Columns::ColumnId id) const { int columns = getColumns(); diff --git a/apps/opencs/model/world/collectionbase.hpp b/apps/opencs/model/world/collectionbase.hpp index ae06d892f5..13471b9886 100644 --- a/apps/opencs/model/world/collectionbase.hpp +++ b/apps/opencs/model/world/collectionbase.hpp @@ -100,6 +100,12 @@ namespace CSMWorld /// /// \return Success? + virtual int getInsertIndex (const std::string& id, + UniversalId::Type type = UniversalId::Type_None, + RecordBase *record = nullptr) const; + ///< Works like getAppendIndex unless an overloaded method uses the record pointer + /// to get additional info about the record that results in an alternative index. + int searchColumnIndex (Columns::ColumnId id) const; ///< Return index of column with the given \a id. If no such column exists, -1 is returned. diff --git a/apps/opencs/model/world/idtable.cpp b/apps/opencs/model/world/idtable.cpp index bce934bd9f..8fb3cd6593 100644 --- a/apps/opencs/model/world/idtable.cpp +++ b/apps/opencs/model/world/idtable.cpp @@ -235,7 +235,13 @@ void CSMWorld::IdTable::setRecord (const std::string& id, if (index==-1) { - int index2 = mIdCollection->getAppendIndex (id, type); + // For info records, appendRecord may use a different index than the one returned by + // getAppendIndex (because of prev/next links). This can result in the display not + // updating correctly after an undo + // + // Use an alternative method to get the correct index. For non-Info records the + // record pointer is ignored and internally calls getAppendIndex. + int index2 = mIdCollection->getInsertIndex (id, type, record.get()); beginInsertRows (QModelIndex(), index2, index2); diff --git a/apps/opencs/model/world/infocollection.cpp b/apps/opencs/model/world/infocollection.cpp index c64e21d3c7..d229ddacc2 100644 --- a/apps/opencs/model/world/infocollection.cpp +++ b/apps/opencs/model/world/infocollection.cpp @@ -8,6 +8,67 @@ #include +namespace CSMWorld +{ + template<> + void Collection >::removeRows (int index, int count) + { + mRecords.erase(mRecords.begin()+index, mRecords.begin()+index+count); + + // index map is updated in InfoCollection::removeRows() + } + + template<> + void Collection >::insertRecord (std::unique_ptr record, + int index, UniversalId::Type type) + { + int size = static_cast(mRecords.size()); + if (index < 0 || index > size) + throw std::runtime_error("index out of range"); + + std::unique_ptr > record2(static_cast*>(record.release())); + + if (index == size) + mRecords.push_back(std::move(record2)); + else + mRecords.insert(mRecords.begin()+index, std::move(record2)); + + // index map is updated in InfoCollection::insertRecord() + } + + template<> + bool Collection >::reorderRowsImp (int baseIndex, + const std::vector& newOrder) + { + if (!newOrder.empty()) + { + int size = static_cast(newOrder.size()); + + // check that all indices are present + std::vector test(newOrder); + std::sort(test.begin(), test.end()); + if (*test.begin() != 0 || *--test.end() != size-1) + return false; + + // reorder records + std::vector > > buffer(size); + + // FIXME: BUG: undo does not remove modified flag + for (int i = 0; i < size; ++i) + { + buffer[newOrder[i]] = std::move(mRecords[baseIndex+i]); + buffer[newOrder[i]]->setModified(buffer[newOrder[i]]->get()); + } + + std::move(buffer.begin(), buffer.end(), mRecords.begin()+baseIndex); + + // index map is updated in InfoCollection::reorderRows() + } + + return true; + } +} + void CSMWorld::InfoCollection::load (const Info& record, bool base) { int index = searchId (record.mId); @@ -19,29 +80,7 @@ void CSMWorld::InfoCollection::load (const Info& record, bool base) record2->mState = base ? RecordBase::State_BaseOnly : RecordBase::State_ModifiedOnly; (base ? record2->mBase : record2->mModified) = record; - std::string topic = Misc::StringUtils::lowerCase (record2->get().mTopicId); - - if (!record2->get().mPrev.empty()) - { - index = getInfoIndex (record2->get().mPrev, topic); // WARN: index repurposed - - if (index!=-1) - ++index; - } - - if (index==-1 && !record2->get().mNext.empty()) - { - index = getInfoIndex (record2->get().mNext, topic); - } - - if (index==-1) - { - Range range = getTopicRange (topic); - - index = std::distance (getRecords().begin(), range.second); - } - - insertRecord (std::move(record2), index); + appendRecord(std::move(record2)); } else { @@ -59,30 +98,74 @@ void CSMWorld::InfoCollection::load (const Info& record, bool base) int CSMWorld::InfoCollection::getInfoIndex (const std::string& id, const std::string& topic) const { - std::string fullId = Misc::StringUtils::lowerCase (topic) + "#" + id; + // find the topic first + std::map > >::const_iterator iter + = mInfoIndex.find(StringHash(std::make_shared(Misc::StringUtils::lowerCase(topic)))); - std::pair range = getTopicRange (topic); + if (iter == mInfoIndex.end()) + return -1; - for (; range.first!=range.second; ++range.first) - if (Misc::StringUtils::ciEqual((*range.first).get()->get().mId, fullId)) - return std::distance (getRecords().begin(), range.first); + // brute force loop + for (std::vector >::const_iterator it = iter->second.begin(); + it != iter->second.end(); ++it) + { + if (Misc::StringUtils::ciEqual(it->first, id)) + return it->second; + } return -1; } -int CSMWorld::InfoCollection::getAppendIndex (const std::string& id, UniversalId::Type type) const +// Calling insertRecord() using index from getInsertIndex() needs to take into account of +// prev/next records; an example is deleting a record then undo +int CSMWorld::InfoCollection::getInsertIndex (const std::string& id, + UniversalId::Type type, RecordBase *record) const { - std::string::size_type separator = id.find_last_of ('#'); + if (record == nullptr) + { + std::string::size_type separator = id.find_last_of('#'); - if (separator==std::string::npos) - throw std::runtime_error ("invalid info ID: " + id); + if (separator == std::string::npos) + throw std::runtime_error("invalid info ID: " + id); - std::pair range = getTopicRange (id.substr (0, separator)); + std::pair range = getTopicRange(id.substr(0, separator)); - if (range.first==range.second) - return Collection >::getAppendIndex (id, type); + if (range.first == range.second) + return Collection >::getAppendIndex(id, type); - return std::distance (getRecords().begin(), range.second); + return std::distance(getRecords().begin(), range.second); + } + + int index = -1; + + const Info& info = static_cast*>(record)->get(); + std::string topic = info.mTopicId; + + // if the record has a prev, find its index value + if (!info.mPrev.empty()) + { + index = getInfoIndex(info.mPrev, topic); + + if (index != -1) + ++index; // if prev exists, set current index to one above prev + } + + // if prev doesn't exist or not found and the record has a next, find its index value + if (index == -1 && !info.mNext.empty()) + { + // if next exists, use its index as the current index + index = getInfoIndex(info.mNext, topic); + } + + // if next doesn't exist or not found (i.e. neither exist yet) then start a new one + if (index == -1) + { + Range range = getTopicRange(topic); // getTopicRange converts topic to lower case first + + index = std::distance(getRecords().begin(), range.second); + } + + return index; } bool CSMWorld::InfoCollection::reorderRows (int baseIndex, const std::vector& newOrder) @@ -99,7 +182,23 @@ bool CSMWorld::InfoCollection::reorderRows (int baseIndex, const std::vector >::reorderRowsImp(baseIndex, newOrder)) + return false; + + // adjust index + int size = static_cast(newOrder.size()); + for (std::map > >::iterator iter + = mInfoIndex.begin(); iter != mInfoIndex.end(); ++iter) + { + for (std::vector >::iterator it = iter->second.begin(); + it != iter->second.end(); ++it) + { + if (it->second >= baseIndex && it->second < baseIndex+size) + it->second = newOrder.at(it->second-baseIndex)+baseIndex; + } + } + + return true; } void CSMWorld::InfoCollection::load (ESM::ESMReader& reader, bool base, const ESM::Dialogue& dialogue) @@ -142,74 +241,54 @@ void CSMWorld::InfoCollection::load (ESM::ESMReader& reader, bool base, const ES CSMWorld::InfoCollection::Range CSMWorld::InfoCollection::getTopicRange (const std::string& topic) const { - std::string topic2 = Misc::StringUtils::lowerCase (topic); + std::string lowerTopic = Misc::StringUtils::lowerCase (topic); - std::map::const_iterator iter = getIdMap().lower_bound (topic2); + // find the topic + std::map > >::const_iterator iter + = mInfoIndex.find(StringHash(std::make_shared(lowerTopic))); - // Skip invalid records: The beginning of a topic string could be identical to another topic - // string. - for (; iter!=getIdMap().end(); ++iter) - { - std::string testTopicId = - Misc::StringUtils::lowerCase (getRecord (iter->second).get().mTopicId); - - if (testTopicId==topic2) - break; - - std::size_t size = topic2.size(); - - if (testTopicId.size()second; - - while (begin != getRecords().begin()) + // topic found, find the starting index + int low = INT_MAX; + for (std::vector >::const_iterator it = iter->second.begin(); + it != iter->second.end(); ++it) { - if (!Misc::StringUtils::ciEqual((*begin)->get().mTopicId, topic2)) - { - // we've gone one too far, go back - ++begin; - break; - } - --begin; + low = std::min(low, it->second); } - // Find end - RecordConstIterator end = begin; + RecordConstIterator begin = getRecords().begin() + low; - for (; end!=getRecords().end(); ++end) - if (!Misc::StringUtils::ciEqual((*end)->get().mTopicId, topic2)) - break; + // Find end (one past the range) + RecordConstIterator end = begin + iter->second.size(); + if (end != getRecords().end()) + ++end; return Range (begin, end); } void CSMWorld::InfoCollection::removeDialogueInfos(const std::string& dialogueId) { - std::string id = Misc::StringUtils::lowerCase(dialogueId); std::vector erasedRecords; - std::map::const_iterator current = getIdMap().lower_bound(id); - std::map::const_iterator end = getIdMap().end(); - for (; current != end; ++current) + Range range = getTopicRange(dialogueId); // getTopicRange converts dialogueId to lower case first + + for (; range.first != range.second; ++range.first) { - const Record& record = getRecord(current->second); + const Record& record = **range.first; if (Misc::StringUtils::ciEqual(dialogueId, record.get().mTopicId)) { if (record.mState == RecordBase::State_ModifiedOnly) { - erasedRecords.push_back(current->second); + erasedRecords.push_back(range.first - getRecords().begin()); } else { std::unique_ptr > record2(new Record(record)); record2->mState = RecordBase::State_Deleted; - setRecord(current->second, std::move(record2)); + setRecord(range.first - getRecords().begin(), std::move(record2)); } } else @@ -224,3 +303,105 @@ void CSMWorld::InfoCollection::removeDialogueInfos(const std::string& dialogueId erasedRecords.pop_back(); } } + +// FIXME: removing a record should adjust prev/next and mark those records as modified +// accordingly (also consider undo) +void CSMWorld::InfoCollection::removeRows (int index, int count) +{ + Collection >::removeRows(index, count); // erase records only + + for (std::map > >::iterator iter + = mInfoIndex.begin(); iter != mInfoIndex.end();) + { + for (std::vector >::iterator it = iter->second.begin(); + it != iter->second.end();) + { + if (it->second >= index) + { + if (it->second >= index+count) + { + it->second -= count; + ++it; + } + else + iter->second.erase(it); + } + else + ++it; + } + + // check for an empty vector + if (iter->second.empty()) + mInfoIndex.erase(iter++); + else + ++iter; + } +} + +void CSMWorld::InfoCollection::appendBlankRecord (const std::string& id, UniversalId::Type type) +{ + std::unique_ptr > record2(new Record); + + record2->mState = Record::State_ModifiedOnly; + record2->mModified.blank(); + + record2->get().mId = id; + + insertRecord(std::move(record2), getInsertIndex(id, type, nullptr), type); // call InfoCollection::insertRecord() +} + +int CSMWorld::InfoCollection::searchId (const std::string& id) const +{ + std::string::size_type separator = id.find_last_of('#'); + + if (separator == std::string::npos) + throw std::runtime_error("invalid info ID: " + id); + + return getInfoIndex(id.substr(separator+1), id.substr(0, separator)); +} + +void CSMWorld::InfoCollection::appendRecord (std::unique_ptr record, UniversalId::Type type) +{ + int index = getInsertIndex(static_cast*>(record.get())->get().mId, type, record.get()); + + insertRecord(std::move(record), index, type); +} + +void CSMWorld::InfoCollection::insertRecord (std::unique_ptr record, int index, + UniversalId::Type type) +{ + int size = static_cast(getRecords().size()); + + std::string id = static_cast*>(record.get())->get().mId; + std::string::size_type separator = id.find_last_of('#'); + + if (separator == std::string::npos) + throw std::runtime_error("invalid info ID: " + id); + + Collection >::insertRecord(std::move(record), index, type); // add records only + + // adjust index + if (index < size-1) + { + for (std::map > >::iterator iter + = mInfoIndex.begin(); iter != mInfoIndex.end(); ++iter) + { + for (std::vector >::iterator it = iter->second.begin(); + it != iter->second.end(); ++it) + { + if (it->second >= index) + ++(it->second); + } + } + } + + // get iterator for existing topic or a new topic + std::string lowerId = Misc::StringUtils::lowerCase(id); + std::pair > >::iterator, bool> res + = mInfoIndex.insert( + std::make_pair(StringHash(std::make_shared(lowerId.substr(0, separator))), + std::vector >())); // empty vector + + // insert info and index + res.first->second.push_back(std::make_pair(lowerId.substr(separator+1), index)); +} diff --git a/apps/opencs/model/world/infocollection.hpp b/apps/opencs/model/world/infocollection.hpp index f0379b3c14..1417af6659 100644 --- a/apps/opencs/model/world/infocollection.hpp +++ b/apps/opencs/model/world/infocollection.hpp @@ -1,6 +1,8 @@ #ifndef CSM_WOLRD_INFOCOLLECTION_H #define CSM_WOLRD_INFOCOLLECTION_H +#include + #include "collection.hpp" #include "info.hpp" @@ -11,6 +13,48 @@ namespace ESM namespace CSMWorld { + struct StringHash + { + uint64_t mHash; + std::shared_ptr mString; + + StringHash (std::shared_ptr str) : mString(str) + { + mHash = MurmurHash64A(str->c_str(), str->size(), /*seed*/1); + } + }; +} + +namespace std +{ + template<> struct less + { + bool operator() (const CSMWorld::StringHash& lhs, const CSMWorld::StringHash& rhs) const + { + if (lhs.mHash < rhs.mHash) + return true; + + if (lhs.mHash > rhs.mHash) + return false; + + return *lhs.mString < *rhs.mString; + } + }; +} + +namespace CSMWorld +{ + template<> + void Collection >::removeRows (int index, int count); + + template<> + void Collection >::insertRecord (std::unique_ptr record, + int index, UniversalId::Type type); + + template<> + bool Collection >::reorderRowsImp (int baseIndex, + const std::vector& newOrder); + class InfoCollection : public Collection > { public: @@ -20,18 +64,32 @@ namespace CSMWorld private: + // The general strategy is to keep the records in Collection kept in order (within + // a topic group) while the index lookup maps are not ordered. It is assumed that + // each topic has a small number of infos, which allows the use of vectors for + // iterating through them without too much penalty. + // + // NOTE: hashed topic string as well as id string are stored in lower case. + std::map > > mInfoIndex; + void load (const Info& record, bool base); int getInfoIndex (const std::string& id, const std::string& topic) const; ///< Return index for record \a id or -1 (if not present; deleted records are considered) /// /// \param id info ID without topic prefix + // + /// \attention id and topic are assumed to be in lower case public: - int getAppendIndex (const std::string& id, - UniversalId::Type type = UniversalId::Type_None) const override; + int getInsertIndex (const std::string& id, + UniversalId::Type type = UniversalId::Type_None, + RecordBase *record = nullptr) const override; ///< \param type Will be ignored, unless the collection supports multiple record types + /// + /// Works like getAppendIndex unless an overloaded method uses the record pointer + /// to get additional info about the record that results in an alternative index. bool reorderRows (int baseIndex, const std::vector& newOrder) override; ///< Reorder the rows [baseIndex, baseIndex+newOrder.size()) according to the indices @@ -46,6 +104,20 @@ namespace CSMWorld /// the given topic. void removeDialogueInfos(const std::string& dialogueId); + + void removeRows (int index, int count); + + void appendBlankRecord (const std::string& id, + UniversalId::Type type = UniversalId::Type_None); + + int searchId (const std::string& id) const; + + void appendRecord (std::unique_ptr record, + UniversalId::Type type = UniversalId::Type_None); + + void insertRecord (std::unique_ptr record, + int index, + UniversalId::Type type = UniversalId::Type_None); }; } diff --git a/extern/murmurhash/CMakeLists.txt b/extern/murmurhash/CMakeLists.txt new file mode 100644 index 0000000000..cd8199ff06 --- /dev/null +++ b/extern/murmurhash/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 2.8) + +# This is NOT intended as a stand-alone build system! Instead, you should include this from the main CMakeLists of your project. + +set(MURMURHASH_LIBRARY "murmurhash") + +# Sources +set(SOURCE_FILES + MurmurHash2.cpp +) + +add_library(${MURMURHASH_LIBRARY} STATIC ${SOURCE_FILES}) + +set(MURMURHASH_LIBRARIES ${MURMURHASH_LIBRARY}) + +link_directories(${CMAKE_CURRENT_BINARY_DIR}) +set(MURMURHASH_LIBRARIES ${MURMURHASH_LIBRARIES} PARENT_SCOPE) diff --git a/extern/murmurhash/MurmurHash2.cpp b/extern/murmurhash/MurmurHash2.cpp new file mode 100644 index 0000000000..d1b6f476e8 --- /dev/null +++ b/extern/murmurhash/MurmurHash2.cpp @@ -0,0 +1,522 @@ +//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +#include "MurmurHash2.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const uint32_t m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHash2, 64-bit versions, by Austin Appleby + +// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment +// and endian-ness issues if used across multiple platforms. + +// 64-bit hash for 64-bit platforms + +uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ) +{ + const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); + const int r = 47; + + uint64_t h = seed ^ (len * m); + + const uint64_t * data = (const uint64_t *)key; + const uint64_t * end = data + (len/8); + + while(data != end) + { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char * data2 = (const unsigned char*)data; + + switch(len & 7) + { + case 7: h ^= uint64_t(data2[6]) << 48; + case 6: h ^= uint64_t(data2[5]) << 40; + case 5: h ^= uint64_t(data2[4]) << 32; + case 4: h ^= uint64_t(data2[3]) << 24; + case 3: h ^= uint64_t(data2[2]) << 16; + case 2: h ^= uint64_t(data2[1]) << 8; + case 1: h ^= uint64_t(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + + +// 64-bit hash for 32-bit platforms + +uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + uint32_t h1 = uint32_t(seed) ^ len; + uint32_t h2 = uint32_t(seed >> 32); + + const uint32_t * data = (const uint32_t *)key; + + while(len >= 8) + { + uint32_t k1 = *data++; + k1 *= m; k1 ^= k1 >> r; k1 *= m; + h1 *= m; h1 ^= k1; + len -= 4; + + uint32_t k2 = *data++; + k2 *= m; k2 ^= k2 >> r; k2 *= m; + h2 *= m; h2 ^= k2; + len -= 4; + } + + if(len >= 4) + { + uint32_t k1 = *data++; + k1 *= m; k1 ^= k1 >> r; k1 *= m; + h1 *= m; h1 ^= k1; + len -= 4; + } + + switch(len) + { + case 3: h2 ^= ((unsigned char*)data)[2] << 16; + case 2: h2 ^= ((unsigned char*)data)[1] << 8; + case 1: h2 ^= ((unsigned char*)data)[0]; + h2 *= m; + }; + + h1 ^= h2 >> 18; h1 *= m; + h2 ^= h1 >> 22; h2 *= m; + h1 ^= h2 >> 17; h1 *= m; + h2 ^= h1 >> 19; h2 *= m; + + uint64_t h = h1; + + h = (h << 32) | h2; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHash2A, by Austin Appleby + +// This is a variant of MurmurHash2 modified to use the Merkle-Damgard +// construction. Bulk speed should be identical to Murmur2, small-key speed +// will be 10%-20% slower due to the added overhead at the end of the hash. + +// This variant fixes a minor issue where null keys were more likely to +// collide with each other than expected, and also makes the function +// more amenable to incremental implementations. + +#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + uint32_t l = len; + + const unsigned char * data = (const unsigned char *)key; + + uint32_t h = seed; + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + mmix(h,k); + + data += 4; + len -= 4; + } + + uint32_t t = 0; + + switch(len) + { + case 3: t ^= data[2] << 16; + case 2: t ^= data[1] << 8; + case 1: t ^= data[0]; + }; + + mmix(h,t); + mmix(h,l); + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// CMurmurHash2A, by Austin Appleby + +// This is a sample implementation of MurmurHash2A designed to work +// incrementally. + +// Usage - + +// CMurmurHash2A hasher +// hasher.Begin(seed); +// hasher.Add(data1,size1); +// hasher.Add(data2,size2); +// ... +// hasher.Add(dataN,sizeN); +// uint32_t hash = hasher.End() + +class CMurmurHash2A +{ +public: + + void Begin ( uint32_t seed = 0 ) + { + m_hash = seed; + m_tail = 0; + m_count = 0; + m_size = 0; + } + + void Add ( const unsigned char * data, int len ) + { + m_size += len; + + MixTail(data,len); + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + mmix(m_hash,k); + + data += 4; + len -= 4; + } + + MixTail(data,len); + } + + uint32_t End ( void ) + { + mmix(m_hash,m_tail); + mmix(m_hash,m_size); + + m_hash ^= m_hash >> 13; + m_hash *= m; + m_hash ^= m_hash >> 15; + + return m_hash; + } + +private: + + static const uint32_t m = 0x5bd1e995; + static const int r = 24; + + void MixTail ( const unsigned char * & data, int & len ) + { + while( len && ((len<4) || m_count) ) + { + m_tail |= (*data++) << (m_count * 8); + + m_count++; + len--; + + if(m_count == 4) + { + mmix(m_hash,m_tail); + m_tail = 0; + m_count = 0; + } + } + } + + uint32_t m_hash; + uint32_t m_tail; + uint32_t m_count; + uint32_t m_size; +}; + +//----------------------------------------------------------------------------- +// MurmurHashNeutral2, by Austin Appleby + +// Same as MurmurHash2, but endian- and alignment-neutral. +// Half the speed though, alas. + +uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + uint32_t h = seed ^ len; + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + uint32_t k; + + k = data[0]; + k |= data[1] << 8; + k |= data[2] << 16; + k |= data[3] << 24; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHashAligned2, by Austin Appleby + +// Same algorithm as MurmurHash2, but only does aligned reads - should be safer +// on certain platforms. + +// Performance will be lower than MurmurHash2 + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + + +uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + const unsigned char * data = (const unsigned char *)key; + + uint32_t h = seed ^ len; + + int align = (uint64_t)data & 3; + + if(align && (len >= 4)) + { + // Pre-load the temp registers + + uint32_t t = 0, d = 0; + + switch(align) + { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int sl = 8 * (4-align); + int sr = 8 * align; + + // Mix + + while(len >= 4) + { + d = *(uint32_t *)data; + t = (t >> sr) | (d << sl); + + uint32_t k = t; + + MIX(h,k,m); + + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + d = 0; + + if(len >= align) + { + switch(align) + { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } + + uint32_t k = (t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + } + else + { + switch(len) + { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); + h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } + else + { + while(len >= 4) + { + uint32_t k = *(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } + + //---------- + // Handle tail bytes + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} + +//----------------------------------------------------------------------------- diff --git a/extern/murmurhash/MurmurHash2.h b/extern/murmurhash/MurmurHash2.h new file mode 100644 index 0000000000..e6d0c36924 --- /dev/null +++ b/extern/murmurhash/MurmurHash2.h @@ -0,0 +1,38 @@ +//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH2_H_ +#define _MURMURHASH2_H_ + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ); +uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ); +uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ); +uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ); +uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ); +uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH2_H_