From 357fec11295c9422dce1c8102b7f75e710e8cb72 Mon Sep 17 00:00:00 2001 From: casey langen Date: Fri, 4 Jan 2019 22:20:51 -0800 Subject: [PATCH] * Added some missing DB indexes for HUGE indexer performance increases -- we seem to be able to easily index 300k+ (and probably more) tracks now. * Ensure we replace invalid unicode characters before inserting values into the DB * A bunch of GME fixes * Various other small indexer cleanups and fixes --- CMakeLists.txt | 4 +- src/core/db/Statement.cpp | 36 +---- src/core/db/Statement.h | 5 +- src/core/debug.cpp | 2 +- src/core/library/Indexer.cpp | 24 ++- src/core/library/Indexer.h | 3 +- src/core/library/LocalLibrary.cpp | 8 + src/core/library/track/IndexerTrack.cpp | 7 +- src/core/sdk/IIndexerWriter.h | 3 +- src/plugins/gmedecoder/Constants.h | 35 ++++- src/plugins/gmedecoder/GmeIndexerSource.cpp | 164 +++++++++++--------- src/plugins/gmedecoder/GmeIndexerSource.h | 4 +- 12 files changed, 168 insertions(+), 127 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b90bc6885..711f7fb3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,10 +15,10 @@ include(CMakeToolsHelpers OPTIONAL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -Wno-unused-result -Wno-deprecated-declarations") -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -frtti -fexceptions") # enable for additional memory checking with fsanitize # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g3 -fsanitize=address,undefined") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -frtti -fexceptions") if (${LINK_STATICALLY} MATCHES "true") set(Boost_USE_STATIC_LIBS ON) diff --git a/src/core/db/Statement.cpp b/src/core/db/Statement.cpp index a5a66dd04..8534a163b 100644 --- a/src/core/db/Statement.cpp +++ b/src/core/db/Statement.cpp @@ -94,37 +94,17 @@ void Statement::BindFloat(int position, float bindFloat) { sqlite3_bind_double(this->stmt, position + 1, bindFloat); } -void Statement::BindText(int position, const char* bindText) { - sqlite3_bind_text( - this->stmt, - position + 1, - bindText, - -1, - SQLITE_STATIC); -} +void Statement::BindText(int position, const std::string& bindText) { + std::string sanitized; + utf8::replace_invalid( + bindText.begin(), + bindText.end(), + std::back_inserter(sanitized), + (uint32_t) '?'); -void Statement::BindText(int position, const std::string &bindText) { sqlite3_bind_text( this->stmt, position + 1, - bindText.c_str(), - -1, - SQLITE_TRANSIENT); -} - -void Statement::BindTextW(int position, const wchar_t* bindText) { - sqlite3_bind_text16( - this->stmt, - position + 1, - bindText, - -1, - SQLITE_STATIC); -} - -void Statement::BindTextW(int position, const std::wstring &bindText) { - sqlite3_bind_text16( - this->stmt, - position + 1, - bindText.c_str(), + sanitized.c_str(), -1, SQLITE_TRANSIENT); } diff --git a/src/core/db/Statement.h b/src/core/db/Statement.h index 8c8f6e147..d2cf5a476 100644 --- a/src/core/db/Statement.h +++ b/src/core/db/Statement.h @@ -53,10 +53,7 @@ namespace musik { namespace core { namespace db { void BindInt32(int position, int bindInt); void BindInt64(int position, int64_t bindInt); void BindFloat(int position, float bindFloat); - void BindText(int position, const char* bindText); - void BindText(int position, const std::string &bindText); - void BindTextW(int position, const wchar_t* bindText); - void BindTextW(int position, const std::wstring &bindText); + void BindText(int position, const std::string& bindText); void BindNull(int position); int ColumnInt32(int column); diff --git a/src/core/debug.cpp b/src/core/debug.cpp index 88fdbe07d..c1c16ab7b 100755 --- a/src/core/debug.cpp +++ b/src/core/debug.cpp @@ -152,7 +152,7 @@ static void thread_proc() { backend->warning(entry->tag, entry->message); break; case debug_level::error: - backend->verbose(entry->tag, entry->message); + backend->error(entry->tag, entry->message); break; } } diff --git a/src/core/library/Indexer.cpp b/src/core/library/Indexer.cpp index 8b9f345f0..30d719453 100644 --- a/src/core/library/Indexer.cpp +++ b/src/core/library/Indexer.cpp @@ -221,6 +221,8 @@ void Indexer::RemovePath(const std::string& path) { } void Indexer::Synchronize(const SyncContext& context, boost::asio::io_service* io) { + LocalLibrary::CreateIndexes(this->dbConnection); + IndexerTrack::OnIndexerStarted(this->dbConnection); this->ProcessAddRemoveQueue(); @@ -229,9 +231,6 @@ void Indexer::Synchronize(const SyncContext& context, boost::asio::io_service* i auto type = context.type; auto sourceId = context.sourceId; - - LocalLibrary::DropIndexes(this->dbConnection); - if (type == SyncType::Rebuild) { LocalLibrary::InvalidateTrackMetadata(this->dbConnection); type = SyncType::All; @@ -999,13 +998,30 @@ int Indexer::RemoveAll(IIndexerSource* source) { return 0; } -void Indexer::CommitProgress(IIndexerSource* source) { +void Indexer::CommitProgress(IIndexerSource* source, unsigned updatedTracks) { if (this->currentSource && this->currentSource->SourceId() == source->SourceId() && trackTransaction) { trackTransaction->CommitAndRestart(); } + + if (updatedTracks) { + std::unique_lock lock(IndexerTrack::sharedWriteMutex); + this->Progress((int) updatedTracks); + } +} + +int Indexer::GetLastModifiedTime(IIndexerSource* source, const char* externalId) { + db::Statement stmt("SELECT filetime FROM tracks t where source_id=? AND external_id=?", dbConnection); + + stmt.BindInt32(0, source->SourceId()); + stmt.BindText(1, externalId); + if (stmt.Step() == db::Row) { + return stmt.ColumnInt32(0); + } + + return -1; } void Indexer::ScheduleRescan(IIndexerSource* source) { diff --git a/src/core/library/Indexer.h b/src/core/library/Indexer.h index d5113ee27..90757b313 100644 --- a/src/core/library/Indexer.h +++ b/src/core/library/Indexer.h @@ -83,7 +83,8 @@ namespace musik { namespace core { virtual bool RemoveByUri(musik::core::sdk::IIndexerSource* source, const char* uri) override; virtual bool RemoveByExternalId(musik::core::sdk::IIndexerSource* source, const char* id) override; virtual int RemoveAll(musik::core::sdk::IIndexerSource* source) override; - virtual void CommitProgress(musik::core::sdk::IIndexerSource* source) override; + virtual void CommitProgress(musik::core::sdk::IIndexerSource* source, unsigned updatedTracks) override; + virtual int GetLastModifiedTime(musik::core::sdk::IIndexerSource* source, const char* id) override; virtual bool Save( musik::core::sdk::IIndexerSource* source, diff --git a/src/core/library/LocalLibrary.cpp b/src/core/library/LocalLibrary.cpp index 0173e2b41..60da14449 100644 --- a/src/core/library/LocalLibrary.cpp +++ b/src/core/library/LocalLibrary.cpp @@ -607,6 +607,10 @@ void LocalLibrary::DropIndexes(db::Connection &db) { db.Execute("DROP INDEX IF EXISTS metavalues_index1"); db.Execute("DROP INDEX IF EXISTS tracks_external_id_index"); + db.Execute("DROP INDEX IF EXISTS tracks_filename_id_index"); + db.Execute("DROP INDEX IF EXISTS tracks_dirty_index"); + db.Execute("DROP INDEX IF EXISTS tracks_external_id_filetime_index"); + db.Execute("DROP INDEX IF EXISTS tracks_by_source_index"); db.Execute("DROP INDEX IF EXISTS playlist_tracks_index_1"); db.Execute("DROP INDEX IF EXISTS playlist_tracks_index_2"); @@ -635,6 +639,10 @@ void LocalLibrary::CreateIndexes(db::Connection &db) { db.Execute("CREATE INDEX IF NOT EXISTS metavalues_index4 ON meta_values (id, content)"); db.Execute("CREATE INDEX IF NOT EXISTS tracks_external_id_index ON tracks (external_id)"); + db.Execute("CREATE INDEX IF NOT EXISTS tracks_filename_index ON tracks (filename)"); + db.Execute("CREATE INDEX IF NOT EXISTS tracks_dirty_index ON tracks (id, filename, filesize, filetime)"); + db.Execute("CREATE INDEX IF NOT EXISTS tracks_external_id_filetime_index ON tracks (external_id, filetime)"); + db.Execute("CREATE INDEX IF NOT EXISTS tracks_by_source_index ON tracks (id, external_id, filename, source_id)"); db.Execute("CREATE INDEX IF NOT EXISTS playlist_tracks_index_1 ON playlist_tracks (track_external_id,playlist_id,sort_order)"); db.Execute("CREATE INDEX IF NOT EXISTS playlist_tracks_index_2 ON playlist_tracks (track_external_id,sort_order)"); diff --git a/src/core/library/track/IndexerTrack.cpp b/src/core/library/track/IndexerTrack.cpp index ae8d89542..65fba4b87 100644 --- a/src/core/library/track/IndexerTrack.cpp +++ b/src/core/library/track/IndexerTrack.cpp @@ -44,7 +44,6 @@ #include #include -#include #include using namespace musik::core; @@ -258,8 +257,8 @@ bool IndexerTrack::NeedsToBeIndexed( size_t fileSize = (size_t) boost::filesystem::file_size(file); size_t fileTime = (size_t) boost::filesystem::last_write_time(file); - this->SetValue("filesize", boost::lexical_cast(fileSize).c_str()); - this->SetValue("filetime", boost::lexical_cast(fileTime).c_str()); + this->SetValue("filesize", std::to_string(fileSize).c_str()); + this->SetValue("filetime", std::to_string(fileTime).c_str()); db::Statement stmt( "SELECT id, filename, filesize, filetime " \ @@ -443,7 +442,7 @@ int64_t IndexerTrack::SaveThumbnail(db::Connection& connection, const std::strin std::string filename = libraryDirectory + "thumbs/" + - boost::lexical_cast(thumbnailId) + + std::to_string(thumbnailId) + ".jpg"; #ifdef WIN32 diff --git a/src/core/sdk/IIndexerWriter.h b/src/core/sdk/IIndexerWriter.h index 4b97e8d05..e8550bed8 100644 --- a/src/core/sdk/IIndexerWriter.h +++ b/src/core/sdk/IIndexerWriter.h @@ -44,10 +44,11 @@ namespace musik { namespace core { namespace sdk { public: virtual ITagStore* CreateWriter() = 0; virtual bool Save(IIndexerSource* source, ITagStore* track, const char* externalId = "") = 0; - virtual void CommitProgress(IIndexerSource* source) = 0; + virtual void CommitProgress(IIndexerSource* source, unsigned updatedTracks = 0) = 0; virtual bool RemoveByUri(IIndexerSource* source, const char* uri) = 0; virtual bool RemoveByExternalId(IIndexerSource* source, const char* id) = 0; virtual int RemoveAll(IIndexerSource* source) = 0; + virtual int GetLastModifiedTime(IIndexerSource* source, const char* externalId) = 0; }; } } } diff --git a/src/plugins/gmedecoder/Constants.h b/src/plugins/gmedecoder/Constants.h index a4eabdc8b..cf9b877df 100644 --- a/src/plugins/gmedecoder/Constants.h +++ b/src/plugins/gmedecoder/Constants.h @@ -41,6 +41,8 @@ #include #include #include +#include +#include #ifdef WIN32 #define DLLEXPORT __declspec(dllexport) @@ -101,7 +103,9 @@ static inline musik::core::sdk::ISchema* CreateSchema() { static inline bool canHandle(std::string fn) { std::transform(fn.begin(), fn.end(), fn.begin(), ::tolower); for (auto& ext : FORMATS) { - if (fn.rfind(ext) == fn.size() - ext.size()) { + if (fn.size() >= ext.size() && + fn.rfind(ext) == fn.size() - ext.size()) + { return true; } } @@ -157,16 +161,33 @@ static std::string getM3uFor(const std::string& fn) { return ""; } -static bool exists(const std::string& externalId) { - std::string fn; - int trackNum; - if (!parseExternalId(externalId, fn, trackNum)) { - return false; - } +static inline bool fileExists(const std::string& fn) { #ifdef WIN32 auto fn16 = u8to16(fn.c_str()); return _waccess(fn16.c_str(), R_OK) != -1; #else return access(fn.c_str(), R_OK) != -1; #endif +} + +static inline bool externalIdExists(const std::string& externalId) { + std::string fn; + int trackNum; + if (!parseExternalId(externalId, fn, trackNum)) { + return false; + } + return fileExists(fn); +} + +static int getLastModifiedTime(const std::string& fn) { +#ifdef WIN32 + /* todo */ +#else + struct stat result = { 0 }; + if (stat(fn.c_str(), &result) == 0) { + return result.st_mtime; + } +#endif + + return -1; } \ No newline at end of file diff --git a/src/plugins/gmedecoder/GmeIndexerSource.cpp b/src/plugins/gmedecoder/GmeIndexerSource.cpp index b84e9bbf3..4553d5946 100644 --- a/src/plugins/gmedecoder/GmeIndexerSource.cpp +++ b/src/plugins/gmedecoder/GmeIndexerSource.cpp @@ -58,10 +58,12 @@ void GmeIndexerSource::Release() { } void GmeIndexerSource::OnBeforeScan() { + this->filesIndexed = 0; + this->interrupt = false; } void GmeIndexerSource::OnAfterScan() { - trackCache.clear(); + invalidFiles.clear(); } ScanResult GmeIndexerSource::Scan( @@ -73,11 +75,13 @@ ScanResult GmeIndexerSource::Scan( this->ScanDirectory(std::string(indexerPaths[i]), this, indexer); } + indexer->CommitProgress(this, this->filesIndexed); + return ScanCommit; } void GmeIndexerSource::Interrupt() { - + this->interrupt = true; } void GmeIndexerSource::ScanTrack( @@ -88,15 +92,10 @@ void GmeIndexerSource::ScanTrack( std::string fn; int trackNum; if (parseExternalId(externalId, fn, trackNum)) { - auto fnIt = trackCache.find(fn); - if (fnIt != trackCache.end()) { - auto tnIt = fnIt->second.find(trackNum); - if (tnIt != fnIt->second.end()) { - return; /* track exists */ - } + if (!fileExists(fn) || invalidFiles.find(fn) != invalidFiles.end()) { + indexer->RemoveByExternalId(this, externalId); } } - indexer->RemoveByExternalId(this, externalId); } int GmeIndexerSource::SourceId() { @@ -108,82 +107,81 @@ void GmeIndexerSource::UpdateMetadata( IIndexerSource* source, IIndexerWriter* indexer) { - gme_t* data = nullptr; - gme_err_t err = gme_open_file(fn.c_str(), &data, gme_info_only); - if (err) { - debug->Error(PLUGIN_NAME, strfmt("error opening %s", fn.c_str()).c_str()); - } - else { - if (prefs->GetBool(KEY_ENABLE_M3U, DEFAULT_ENABLE_M3U)) { - std::string m3u = getM3uFor(fn); - if (m3u.size()) { - err = gme_load_m3u(data, m3u.c_str()); - if (err) { - debug->Error(PLUGIN_NAME, strfmt("m3u found, but load failed '%s'", err).c_str()); - } - } + /* only need to do this check once, and it's relatively expensive because + it requires a db read. cache we've already done it. */ + int modifiedTime = getLastModifiedTime(fn); + const std::string firstExternalId = createExternalId(fn, 0); + int modifiedDbTime = indexer->GetLastModifiedTime(this, firstExternalId.c_str()); + if (modifiedDbTime < 0 || modifiedTime != modifiedDbTime) { + gme_t* data = nullptr; + gme_err_t err = gme_open_file(fn.c_str(), &data, gme_info_only); + if (err) { + debug->Error(PLUGIN_NAME, strfmt("error opening %s", fn.c_str()).c_str()); + invalidFiles.insert(fn); } - - for (short i = 0; i < (short) gme_track_count(data); i++) { - gme_info_t* info = nullptr; - err = gme_track_info(data, &info, i); - if (err) { - debug->Error(PLUGIN_NAME, strfmt("error getting track %d: %s", i, err).c_str()); + else { + if (prefs->GetBool(KEY_ENABLE_M3U, DEFAULT_ENABLE_M3U)) { + std::string m3u = getM3uFor(fn); + if (m3u.size()) { + err = gme_load_m3u(data, m3u.c_str()); + if (err) { + debug->Error(PLUGIN_NAME, strfmt("m3u found, but load failed '%s'", err).c_str()); + } + } } - else if (info) { - auto track = indexer->CreateWriter(); - const std::string trackNum = std::to_string(i + 1).c_str(); - const std::string defaultTitle = "Track " + std::to_string(1 + i); + const std::string defaultDuration = + std::to_string(prefs->GetDouble( + KEY_DEFAULT_TRACK_LENGTH, + DEFAULT_TRACK_LENGTH)); + + for (int i = 0; i < gme_track_count(data); i++) { const std::string externalId = createExternalId(fn, i); + const std::string trackNum = std::to_string(i + 1); + const std::string defaultTitle = "Track " + std::to_string(1 + i); + const std::string modifiedTimeStr = std::to_string(modifiedTime); - std::string duration; - if (info->length == -1) { - duration = std::to_string(prefs->GetDouble( - KEY_DEFAULT_TRACK_LENGTH, DEFAULT_TRACK_LENGTH)); - } - else { - duration = std::to_string((float)info->play_length / 1000.0f); - } - - track->SetValue("album", info->game); - track->SetValue("album_artist", info->system); - track->SetValue("genre", info->system); - track->SetValue("track", trackNum.c_str()); - track->SetValue("duration", duration.c_str()); + auto track = indexer->CreateWriter(); track->SetValue("filename", externalId.c_str()); + track->SetValue("filetime", modifiedTimeStr.c_str()); + track->SetValue("track", trackNum.c_str()); - if (strlen(info->author)) { - track->SetValue("artist", info->author); - } - else { - track->SetValue("artist", info->system); - } - - if (strlen(info->song)) { - track->SetValue("title", info->song); - } - else { + gme_info_t* info = nullptr; + err = gme_track_info(data, &info, i); + if (err) { + debug->Error(PLUGIN_NAME, strfmt("error getting track %d: %s", i, err).c_str()); + track->SetValue("duration", defaultDuration.c_str()); track->SetValue("title", defaultTitle.c_str()); } + else if (info) { + std::string duration = (info->length == -1) + ? defaultDuration + : std::to_string((float) info->play_length / 1000.0f); + + track->SetValue("album", info->game); + track->SetValue("album_artist", info->system); + track->SetValue("genre", info->system); + track->SetValue("duration", duration.c_str()); + track->SetValue("artist", strlen(info->author) ? info->author : info->system); + track->SetValue("title", strlen(info->song) ? info->song : defaultTitle.c_str()); + + gme_free_info(info); + } indexer->Save(source, track, externalId.c_str()); - track->Release(); - - /* we maintain a cache of all the tracks we index, so when the indexer - hits the removal phase we don't have to do any i/o. */ - if (trackCache.find(fn) == trackCache.end()) { - trackCache[fn] = { (short) i }; - } - else { - trackCache[fn].insert(i); - } } - gme_free_info(info); } + + gme_delete(data); + } + + /* we commit progress every so often */ + if (++this->filesIndexed % 300 == 0) { + debug->Verbose(PLUGIN_NAME, strfmt("checkpoint %d files", this->filesIndexed).c_str()); + indexer->CommitProgress(this, this->filesIndexed); + this->filesIndexed = 0; } - gme_delete(data); } void GmeIndexerSource::ScanDirectory( @@ -206,14 +204,23 @@ void GmeIndexerSource::ScanDirectory( } std::string relPath8 = u16to8(findData.cFileName); std::string fullPath8 = path + "\\" + relPath8; - if (findData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY) { + if (this->interrupt) { + return; + } + else if (findData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY) { if (relPath8 != "." && relPath8 != "..") { this->ScanDirectory(fullPath8 + "\\", source, indexer); } } else { if (canHandle(fullPath8)) { - this->UpdateMetadata(fullPath8, source, indexer); + try { + this->UpdateMetadata(fullPath8, source, indexer); + } + catch (...) { + std::string error = strfmt("error reading metadata for %s", fullFn.c_str()); + debug->Error(PLUGIN_NAME, error.c_str()); + } } } } @@ -228,7 +235,10 @@ void GmeIndexerSource::ScanDirectory( } while ((entry = readdir(dir)) != nullptr) { - if (entry->d_type == DT_DIR) { + if (this->interrupt) { + return; + } + else if (entry->d_type == DT_DIR) { std::string name = entry->d_name; if (name == "." || name == "..") { continue; @@ -239,7 +249,13 @@ void GmeIndexerSource::ScanDirectory( std::string fn = entry->d_name; if (canHandle(fn)) { std::string fullFn = path + "/" + fn; - this->UpdateMetadata(fullFn, source, indexer); + try { + this->UpdateMetadata(fullFn, source, indexer); + } + catch (...) { + std::string error = strfmt("error reading metadata for %s", fullFn.c_str()); + debug->Error(PLUGIN_NAME, error.c_str()); + } } } } diff --git a/src/plugins/gmedecoder/GmeIndexerSource.h b/src/plugins/gmedecoder/GmeIndexerSource.h index e01914043..5983eae1f 100644 --- a/src/plugins/gmedecoder/GmeIndexerSource.h +++ b/src/plugins/gmedecoder/GmeIndexerSource.h @@ -78,5 +78,7 @@ class GmeIndexerSource: public musik::core::sdk::IIndexerSource { musik::core::sdk::IIndexerSource* source, musik::core::sdk::IIndexerWriter* indexer); - std::map> trackCache; + std::set invalidFiles; + size_t filesIndexed; + volatile bool interrupt { false }; }; \ No newline at end of file