* Added some missing DB indexes for HUGE indexer performance increases -- we seem to be able to easily index 300k+ (and probably more) tracks now.

* Ensure we replace invalid unicode characters before inserting values into the DB
* A bunch of GME fixes
* Various other small indexer cleanups and fixes
This commit is contained in:
casey langen 2019-01-04 22:20:51 -08:00
parent 1d9893ec4e
commit 357fec1129
12 changed files with 168 additions and 127 deletions

View File

@ -15,10 +15,10 @@ include(CMakeToolsHelpers OPTIONAL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -Wno-unused-result -Wno-deprecated-declarations")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -frtti -fexceptions")
# enable for additional memory checking with fsanitize
# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g3 -fsanitize=address,undefined")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -frtti -fexceptions")
if (${LINK_STATICALLY} MATCHES "true")
set(Boost_USE_STATIC_LIBS ON)

View File

@ -94,37 +94,17 @@ void Statement::BindFloat(int position, float bindFloat) {
sqlite3_bind_double(this->stmt, position + 1, bindFloat);
}
void Statement::BindText(int position, const char* bindText) {
sqlite3_bind_text(
this->stmt,
position + 1,
bindText,
-1,
SQLITE_STATIC);
}
void Statement::BindText(int position, const std::string& bindText) {
std::string sanitized;
utf8::replace_invalid(
bindText.begin(),
bindText.end(),
std::back_inserter(sanitized),
(uint32_t) '?');
void Statement::BindText(int position, const std::string &bindText) {
sqlite3_bind_text(
this->stmt, position + 1,
bindText.c_str(),
-1,
SQLITE_TRANSIENT);
}
void Statement::BindTextW(int position, const wchar_t* bindText) {
sqlite3_bind_text16(
this->stmt,
position + 1,
bindText,
-1,
SQLITE_STATIC);
}
void Statement::BindTextW(int position, const std::wstring &bindText) {
sqlite3_bind_text16(
this->stmt,
position + 1,
bindText.c_str(),
sanitized.c_str(),
-1,
SQLITE_TRANSIENT);
}

View File

@ -53,10 +53,7 @@ namespace musik { namespace core { namespace db {
void BindInt32(int position, int bindInt);
void BindInt64(int position, int64_t bindInt);
void BindFloat(int position, float bindFloat);
void BindText(int position, const char* bindText);
void BindText(int position, const std::string &bindText);
void BindTextW(int position, const wchar_t* bindText);
void BindTextW(int position, const std::wstring &bindText);
void BindText(int position, const std::string& bindText);
void BindNull(int position);
int ColumnInt32(int column);

View File

@ -152,7 +152,7 @@ static void thread_proc() {
backend->warning(entry->tag, entry->message);
break;
case debug_level::error:
backend->verbose(entry->tag, entry->message);
backend->error(entry->tag, entry->message);
break;
}
}

View File

@ -221,6 +221,8 @@ void Indexer::RemovePath(const std::string& path) {
}
void Indexer::Synchronize(const SyncContext& context, boost::asio::io_service* io) {
LocalLibrary::CreateIndexes(this->dbConnection);
IndexerTrack::OnIndexerStarted(this->dbConnection);
this->ProcessAddRemoveQueue();
@ -229,9 +231,6 @@ void Indexer::Synchronize(const SyncContext& context, boost::asio::io_service* i
auto type = context.type;
auto sourceId = context.sourceId;
LocalLibrary::DropIndexes(this->dbConnection);
if (type == SyncType::Rebuild) {
LocalLibrary::InvalidateTrackMetadata(this->dbConnection);
type = SyncType::All;
@ -999,13 +998,30 @@ int Indexer::RemoveAll(IIndexerSource* source) {
return 0;
}
void Indexer::CommitProgress(IIndexerSource* source) {
void Indexer::CommitProgress(IIndexerSource* source, unsigned updatedTracks) {
if (this->currentSource &&
this->currentSource->SourceId() == source->SourceId() &&
trackTransaction)
{
trackTransaction->CommitAndRestart();
}
if (updatedTracks) {
std::unique_lock<std::mutex> lock(IndexerTrack::sharedWriteMutex);
this->Progress((int) updatedTracks);
}
}
int Indexer::GetLastModifiedTime(IIndexerSource* source, const char* externalId) {
db::Statement stmt("SELECT filetime FROM tracks t where source_id=? AND external_id=?", dbConnection);
stmt.BindInt32(0, source->SourceId());
stmt.BindText(1, externalId);
if (stmt.Step() == db::Row) {
return stmt.ColumnInt32(0);
}
return -1;
}
void Indexer::ScheduleRescan(IIndexerSource* source) {

View File

@ -83,7 +83,8 @@ namespace musik { namespace core {
virtual bool RemoveByUri(musik::core::sdk::IIndexerSource* source, const char* uri) override;
virtual bool RemoveByExternalId(musik::core::sdk::IIndexerSource* source, const char* id) override;
virtual int RemoveAll(musik::core::sdk::IIndexerSource* source) override;
virtual void CommitProgress(musik::core::sdk::IIndexerSource* source) override;
virtual void CommitProgress(musik::core::sdk::IIndexerSource* source, unsigned updatedTracks) override;
virtual int GetLastModifiedTime(musik::core::sdk::IIndexerSource* source, const char* id) override;
virtual bool Save(
musik::core::sdk::IIndexerSource* source,

View File

@ -607,6 +607,10 @@ void LocalLibrary::DropIndexes(db::Connection &db) {
db.Execute("DROP INDEX IF EXISTS metavalues_index1");
db.Execute("DROP INDEX IF EXISTS tracks_external_id_index");
db.Execute("DROP INDEX IF EXISTS tracks_filename_id_index");
db.Execute("DROP INDEX IF EXISTS tracks_dirty_index");
db.Execute("DROP INDEX IF EXISTS tracks_external_id_filetime_index");
db.Execute("DROP INDEX IF EXISTS tracks_by_source_index");
db.Execute("DROP INDEX IF EXISTS playlist_tracks_index_1");
db.Execute("DROP INDEX IF EXISTS playlist_tracks_index_2");
@ -635,6 +639,10 @@ void LocalLibrary::CreateIndexes(db::Connection &db) {
db.Execute("CREATE INDEX IF NOT EXISTS metavalues_index4 ON meta_values (id, content)");
db.Execute("CREATE INDEX IF NOT EXISTS tracks_external_id_index ON tracks (external_id)");
db.Execute("CREATE INDEX IF NOT EXISTS tracks_filename_index ON tracks (filename)");
db.Execute("CREATE INDEX IF NOT EXISTS tracks_dirty_index ON tracks (id, filename, filesize, filetime)");
db.Execute("CREATE INDEX IF NOT EXISTS tracks_external_id_filetime_index ON tracks (external_id, filetime)");
db.Execute("CREATE INDEX IF NOT EXISTS tracks_by_source_index ON tracks (id, external_id, filename, source_id)");
db.Execute("CREATE INDEX IF NOT EXISTS playlist_tracks_index_1 ON playlist_tracks (track_external_id,playlist_id,sort_order)");
db.Execute("CREATE INDEX IF NOT EXISTS playlist_tracks_index_2 ON playlist_tracks (track_external_id,sort_order)");

View File

@ -44,7 +44,6 @@
#include <core/library/LocalLibrary.h>
#include <core/io/DataStreamFactory.h>
#include <boost/lexical_cast.hpp>
#include <unordered_map>
using namespace musik::core;
@ -258,8 +257,8 @@ bool IndexerTrack::NeedsToBeIndexed(
size_t fileSize = (size_t) boost::filesystem::file_size(file);
size_t fileTime = (size_t) boost::filesystem::last_write_time(file);
this->SetValue("filesize", boost::lexical_cast<std::string>(fileSize).c_str());
this->SetValue("filetime", boost::lexical_cast<std::string>(fileTime).c_str());
this->SetValue("filesize", std::to_string(fileSize).c_str());
this->SetValue("filetime", std::to_string(fileTime).c_str());
db::Statement stmt(
"SELECT id, filename, filesize, filetime " \
@ -443,7 +442,7 @@ int64_t IndexerTrack::SaveThumbnail(db::Connection& connection, const std::strin
std::string filename =
libraryDirectory +
"thumbs/" +
boost::lexical_cast<std::string>(thumbnailId) +
std::to_string(thumbnailId) +
".jpg";
#ifdef WIN32

View File

@ -44,10 +44,11 @@ namespace musik { namespace core { namespace sdk {
public:
virtual ITagStore* CreateWriter() = 0;
virtual bool Save(IIndexerSource* source, ITagStore* track, const char* externalId = "") = 0;
virtual void CommitProgress(IIndexerSource* source) = 0;
virtual void CommitProgress(IIndexerSource* source, unsigned updatedTracks = 0) = 0;
virtual bool RemoveByUri(IIndexerSource* source, const char* uri) = 0;
virtual bool RemoveByExternalId(IIndexerSource* source, const char* id) = 0;
virtual int RemoveAll(IIndexerSource* source) = 0;
virtual int GetLastModifiedTime(IIndexerSource* source, const char* externalId) = 0;
};
} } }

View File

@ -41,6 +41,8 @@
#include <vector>
#include <algorithm>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef WIN32
#define DLLEXPORT __declspec(dllexport)
@ -101,7 +103,9 @@ static inline musik::core::sdk::ISchema* CreateSchema() {
static inline bool canHandle(std::string fn) {
std::transform(fn.begin(), fn.end(), fn.begin(), ::tolower);
for (auto& ext : FORMATS) {
if (fn.rfind(ext) == fn.size() - ext.size()) {
if (fn.size() >= ext.size() &&
fn.rfind(ext) == fn.size() - ext.size())
{
return true;
}
}
@ -157,16 +161,33 @@ static std::string getM3uFor(const std::string& fn) {
return "";
}
static bool exists(const std::string& externalId) {
std::string fn;
int trackNum;
if (!parseExternalId(externalId, fn, trackNum)) {
return false;
}
static inline bool fileExists(const std::string& fn) {
#ifdef WIN32
auto fn16 = u8to16(fn.c_str());
return _waccess(fn16.c_str(), R_OK) != -1;
#else
return access(fn.c_str(), R_OK) != -1;
#endif
}
static inline bool externalIdExists(const std::string& externalId) {
std::string fn;
int trackNum;
if (!parseExternalId(externalId, fn, trackNum)) {
return false;
}
return fileExists(fn);
}
static int getLastModifiedTime(const std::string& fn) {
#ifdef WIN32
/* todo */
#else
struct stat result = { 0 };
if (stat(fn.c_str(), &result) == 0) {
return result.st_mtime;
}
#endif
return -1;
}

View File

@ -58,10 +58,12 @@ void GmeIndexerSource::Release() {
}
void GmeIndexerSource::OnBeforeScan() {
this->filesIndexed = 0;
this->interrupt = false;
}
void GmeIndexerSource::OnAfterScan() {
trackCache.clear();
invalidFiles.clear();
}
ScanResult GmeIndexerSource::Scan(
@ -73,11 +75,13 @@ ScanResult GmeIndexerSource::Scan(
this->ScanDirectory(std::string(indexerPaths[i]), this, indexer);
}
indexer->CommitProgress(this, this->filesIndexed);
return ScanCommit;
}
void GmeIndexerSource::Interrupt() {
this->interrupt = true;
}
void GmeIndexerSource::ScanTrack(
@ -88,15 +92,10 @@ void GmeIndexerSource::ScanTrack(
std::string fn;
int trackNum;
if (parseExternalId(externalId, fn, trackNum)) {
auto fnIt = trackCache.find(fn);
if (fnIt != trackCache.end()) {
auto tnIt = fnIt->second.find(trackNum);
if (tnIt != fnIt->second.end()) {
return; /* track exists */
}
if (!fileExists(fn) || invalidFiles.find(fn) != invalidFiles.end()) {
indexer->RemoveByExternalId(this, externalId);
}
}
indexer->RemoveByExternalId(this, externalId);
}
int GmeIndexerSource::SourceId() {
@ -108,82 +107,81 @@ void GmeIndexerSource::UpdateMetadata(
IIndexerSource* source,
IIndexerWriter* indexer)
{
gme_t* data = nullptr;
gme_err_t err = gme_open_file(fn.c_str(), &data, gme_info_only);
if (err) {
debug->Error(PLUGIN_NAME, strfmt("error opening %s", fn.c_str()).c_str());
}
else {
if (prefs->GetBool(KEY_ENABLE_M3U, DEFAULT_ENABLE_M3U)) {
std::string m3u = getM3uFor(fn);
if (m3u.size()) {
err = gme_load_m3u(data, m3u.c_str());
if (err) {
debug->Error(PLUGIN_NAME, strfmt("m3u found, but load failed '%s'", err).c_str());
}
}
/* only need to do this check once, and it's relatively expensive because
it requires a db read. cache we've already done it. */
int modifiedTime = getLastModifiedTime(fn);
const std::string firstExternalId = createExternalId(fn, 0);
int modifiedDbTime = indexer->GetLastModifiedTime(this, firstExternalId.c_str());
if (modifiedDbTime < 0 || modifiedTime != modifiedDbTime) {
gme_t* data = nullptr;
gme_err_t err = gme_open_file(fn.c_str(), &data, gme_info_only);
if (err) {
debug->Error(PLUGIN_NAME, strfmt("error opening %s", fn.c_str()).c_str());
invalidFiles.insert(fn);
}
for (short i = 0; i < (short) gme_track_count(data); i++) {
gme_info_t* info = nullptr;
err = gme_track_info(data, &info, i);
if (err) {
debug->Error(PLUGIN_NAME, strfmt("error getting track %d: %s", i, err).c_str());
else {
if (prefs->GetBool(KEY_ENABLE_M3U, DEFAULT_ENABLE_M3U)) {
std::string m3u = getM3uFor(fn);
if (m3u.size()) {
err = gme_load_m3u(data, m3u.c_str());
if (err) {
debug->Error(PLUGIN_NAME, strfmt("m3u found, but load failed '%s'", err).c_str());
}
}
}
else if (info) {
auto track = indexer->CreateWriter();
const std::string trackNum = std::to_string(i + 1).c_str();
const std::string defaultTitle = "Track " + std::to_string(1 + i);
const std::string defaultDuration =
std::to_string(prefs->GetDouble(
KEY_DEFAULT_TRACK_LENGTH,
DEFAULT_TRACK_LENGTH));
for (int i = 0; i < gme_track_count(data); i++) {
const std::string externalId = createExternalId(fn, i);
const std::string trackNum = std::to_string(i + 1);
const std::string defaultTitle = "Track " + std::to_string(1 + i);
const std::string modifiedTimeStr = std::to_string(modifiedTime);
std::string duration;
if (info->length == -1) {
duration = std::to_string(prefs->GetDouble(
KEY_DEFAULT_TRACK_LENGTH, DEFAULT_TRACK_LENGTH));
}
else {
duration = std::to_string((float)info->play_length / 1000.0f);
}
track->SetValue("album", info->game);
track->SetValue("album_artist", info->system);
track->SetValue("genre", info->system);
track->SetValue("track", trackNum.c_str());
track->SetValue("duration", duration.c_str());
auto track = indexer->CreateWriter();
track->SetValue("filename", externalId.c_str());
track->SetValue("filetime", modifiedTimeStr.c_str());
track->SetValue("track", trackNum.c_str());
if (strlen(info->author)) {
track->SetValue("artist", info->author);
}
else {
track->SetValue("artist", info->system);
}
if (strlen(info->song)) {
track->SetValue("title", info->song);
}
else {
gme_info_t* info = nullptr;
err = gme_track_info(data, &info, i);
if (err) {
debug->Error(PLUGIN_NAME, strfmt("error getting track %d: %s", i, err).c_str());
track->SetValue("duration", defaultDuration.c_str());
track->SetValue("title", defaultTitle.c_str());
}
else if (info) {
std::string duration = (info->length == -1)
? defaultDuration
: std::to_string((float) info->play_length / 1000.0f);
track->SetValue("album", info->game);
track->SetValue("album_artist", info->system);
track->SetValue("genre", info->system);
track->SetValue("duration", duration.c_str());
track->SetValue("artist", strlen(info->author) ? info->author : info->system);
track->SetValue("title", strlen(info->song) ? info->song : defaultTitle.c_str());
gme_free_info(info);
}
indexer->Save(source, track, externalId.c_str());
track->Release();
/* we maintain a cache of all the tracks we index, so when the indexer
hits the removal phase we don't have to do any i/o. */
if (trackCache.find(fn) == trackCache.end()) {
trackCache[fn] = { (short) i };
}
else {
trackCache[fn].insert(i);
}
}
gme_free_info(info);
}
gme_delete(data);
}
/* we commit progress every so often */
if (++this->filesIndexed % 300 == 0) {
debug->Verbose(PLUGIN_NAME, strfmt("checkpoint %d files", this->filesIndexed).c_str());
indexer->CommitProgress(this, this->filesIndexed);
this->filesIndexed = 0;
}
gme_delete(data);
}
void GmeIndexerSource::ScanDirectory(
@ -206,14 +204,23 @@ void GmeIndexerSource::ScanDirectory(
}
std::string relPath8 = u16to8(findData.cFileName);
std::string fullPath8 = path + "\\" + relPath8;
if (findData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY) {
if (this->interrupt) {
return;
}
else if (findData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY) {
if (relPath8 != "." && relPath8 != "..") {
this->ScanDirectory(fullPath8 + "\\", source, indexer);
}
}
else {
if (canHandle(fullPath8)) {
this->UpdateMetadata(fullPath8, source, indexer);
try {
this->UpdateMetadata(fullPath8, source, indexer);
}
catch (...) {
std::string error = strfmt("error reading metadata for %s", fullFn.c_str());
debug->Error(PLUGIN_NAME, error.c_str());
}
}
}
}
@ -228,7 +235,10 @@ void GmeIndexerSource::ScanDirectory(
}
while ((entry = readdir(dir)) != nullptr) {
if (entry->d_type == DT_DIR) {
if (this->interrupt) {
return;
}
else if (entry->d_type == DT_DIR) {
std::string name = entry->d_name;
if (name == "." || name == "..") {
continue;
@ -239,7 +249,13 @@ void GmeIndexerSource::ScanDirectory(
std::string fn = entry->d_name;
if (canHandle(fn)) {
std::string fullFn = path + "/" + fn;
this->UpdateMetadata(fullFn, source, indexer);
try {
this->UpdateMetadata(fullFn, source, indexer);
}
catch (...) {
std::string error = strfmt("error reading metadata for %s", fullFn.c_str());
debug->Error(PLUGIN_NAME, error.c_str());
}
}
}
}

View File

@ -78,5 +78,7 @@ class GmeIndexerSource: public musik::core::sdk::IIndexerSource {
musik::core::sdk::IIndexerSource* source,
musik::core::sdk::IIndexerWriter* indexer);
std::map<std::string, std::set<short>> trackCache;
std::set<std::string> invalidFiles;
size_t filesIndexed;
volatile bool interrupt { false };
};