From f7f007519cd5d9437787a4b657ee6528f2f6b788 Mon Sep 17 00:00:00 2001 From: jdgleaver Date: Tue, 29 Jun 2021 14:48:18 +0100 Subject: [PATCH] (Playlist) Optimise scanning of large rom sets --- playlist.c | 400 +++++++++++++++++++++++++++------- playlist.h | 19 ++ tasks/task_playlist_manager.c | 44 ++-- 3 files changed, 356 insertions(+), 107 deletions(-) diff --git a/playlist.c b/playlist.c index 95376ba965..4860b4c46b 100644 --- a/playlist.c +++ b/playlist.c @@ -213,6 +213,88 @@ static void path_replace_base_path_and_convert_to_local_file_system( strlcpy(out_path, in_path, size); } +/* Generates a case insensitive hash for the + * specified path string */ +static uint32_t playlist_path_hash(const char *path) +{ + unsigned char c; + uint32_t hash = (uint32_t)0x811c9dc5; + while ((c = (unsigned char)*(path++)) != '\0') + hash = ((hash * (uint32_t)0x01000193) ^ (uint32_t)((c >= 'A' && c <= 'Z') ? (c | 0x20) : c)); + return (hash ? hash : 1); +} + +static void playlist_path_id_free(playlist_path_id_t *path_id) +{ + if (!path_id) + return; + + if (path_id->archive_path && + (path_id->archive_path != path_id->real_path)) + free(path_id->archive_path); + + if (path_id->real_path) + free(path_id->real_path); + + free(path_id); +} + +static playlist_path_id_t *playlist_path_id_init(const char *path) +{ + playlist_path_id_t *path_id = (playlist_path_id_t*)malloc(sizeof(*path_id)); + const char *archive_delim = NULL; + char real_path[PATH_MAX_LENGTH]; + + real_path[0] = '\0'; + + if (!path_id) + return NULL; + + path_id->real_path = NULL; + path_id->archive_path = NULL; + path_id->real_path_hash = 0; + path_id->archive_path_hash = 0; + path_id->is_archive = false; + path_id->is_in_archive = false; + + if (string_is_empty(path)) + return path_id; + + /* Get real path */ + strlcpy(real_path, path, sizeof(real_path)); + playlist_resolve_path(PLAYLIST_SAVE, false, real_path, + sizeof(real_path)); + + path_id->real_path = strdup(real_path); + path_id->real_path_hash = playlist_path_hash(real_path); + + /* Check archive status */ + path_id->is_archive = path_is_compressed_file(real_path); + archive_delim = path_get_archive_delim(real_path); + + /* If path refers to a file inside an archive, + * extract the path of the parent archive */ + if (archive_delim) + { + size_t len = (1 + archive_delim - real_path); + char archive_path[PATH_MAX_LENGTH] = {0}; + + len = (len < PATH_MAX_LENGTH) ? len : PATH_MAX_LENGTH; + strlcpy(archive_path, real_path, len * sizeof(char)); + + path_id->archive_path = strdup(archive_path); + path_id->archive_path_hash = playlist_path_hash(archive_path); + path_id->is_in_archive = true; + } + else if (path_id->is_archive) + { + path_id->archive_path = path_id->real_path; + path_id->archive_path_hash = path_id->real_path_hash; + } + + return path_id; +} + /** * playlist_path_equal: * @real_path : 'Real' search path, generated by path_resolve_realpath() @@ -306,6 +388,102 @@ static bool playlist_path_equal(const char *real_path, return false; } +/** + * playlist_path_matches_entry: + * @path_id : Path identity, containing 'real' path, + * hash and archive status information + * @entry : Playlist entry to compare with path_id + * + * Returns 'true' if 'path_id' matches path information + * contained in specified 'entry'. Will update path_id + * cache inside specified 'entry', if not already present. + **/ +static bool playlist_path_matches_entry(playlist_path_id_t *path_id, + struct playlist_entry *entry, const playlist_config_t *config) +{ + /* Sanity check */ + if (!path_id || + !entry || + !config) + return false; + + /* Check whether entry contains a path ID cache */ + if (!entry->path_id) + { + entry->path_id = playlist_path_id_init(entry->path); + if (!entry->path_id) + return false; + } + + /* Ensure we have valid real_path strings */ + if (string_is_empty(path_id->real_path) || + string_is_empty(entry->path_id->real_path)) + return false; + + /* First pass comparison */ + if (path_id->real_path_hash == + entry->path_id->real_path_hash) + { +#ifdef _WIN32 + /* Handle case-insensitive operating systems*/ + if (string_is_equal_noncase(path_id->real_path, + entry->path_id->real_path)) + return true; +#else + if (string_is_equal(path_id->real_path, + entry->path_id->real_path)) + return true; +#endif + } + +#ifdef RARCH_INTERNAL + /* If fuzzy matching is disabled, we can give up now */ + if (!config->fuzzy_archive_match) + return false; +#endif + + /* If we reach this point, we have to work + * harder... + * Need to handle a rather awkward archive file + * case where: + * - playlist path contains a properly formatted + * [archive_path][delimiter][rom_file] + * - search path is just [archive_path] + * ...or vice versa. + * This pretty much always happens when a playlist + * is generated via scan content (which handles the + * archive paths correctly), but the user subsequently + * loads an archive file via the command line or some + * external launcher (where the [delimiter][rom_file] + * part is almost always omitted) */ + if (((path_id->is_archive && !path_id->is_in_archive) && entry->path_id->is_in_archive) || + ((entry->path_id->is_archive && !entry->path_id->is_in_archive) && path_id->is_in_archive)) + { + /* Ensure we have valid parent archive path + * strings */ + if (string_is_empty(path_id->archive_path) || + string_is_empty(entry->path_id->archive_path)) + return false; + + if (path_id->archive_path_hash == + entry->path_id->archive_path_hash) + { +#ifdef _WIN32 + /* Handle case-insensitive operating systems*/ + if (string_is_equal_noncase(path_id->archive_path, + entry->path_id->archive_path)) + return true; +#else + if (string_is_equal(path_id->archive_path, + entry->path_id->archive_path)) + return true; +#endif + } + } + + return false; +} + /** * playlist_core_path_equal: * @real_core_path : 'Real' search path, generated by path_resolve_realpath() @@ -419,6 +597,8 @@ static void playlist_free_entry(struct playlist_entry *entry) free(entry->last_played_str); if (entry->subsystem_roms) string_list_free(entry->subsystem_roms); + if (entry->path_id) + playlist_path_id_free(entry->path_id); entry->path = NULL; entry->label = NULL; @@ -431,6 +611,7 @@ static void playlist_free_entry(struct playlist_entry *entry) entry->runtime_str = NULL; entry->last_played_str = NULL; entry->subsystem_roms = NULL; + entry->path_id = NULL; entry->runtime_status = PLAYLIST_RUNTIME_UNKNOWN; entry->runtime_hours = 0; entry->runtime_minutes = 0; @@ -488,22 +669,20 @@ void playlist_delete_index(playlist_t *playlist, void playlist_delete_by_path(playlist_t *playlist, const char *search_path) { - size_t i = 0; - char real_search_path[PATH_MAX_LENGTH]; - - real_search_path[0] = '\0'; + playlist_path_id_t *path_id = NULL; + size_t i = 0; if (!playlist || string_is_empty(search_path)) return; - /* Get 'real' search path */ - strlcpy(real_search_path, search_path, sizeof(real_search_path)); - path_resolve_realpath(real_search_path, sizeof(real_search_path), true); + path_id = playlist_path_id_init(search_path); + if (!path_id) + return; while (i < RBUF_LEN(playlist->entries)) { - if (!playlist_path_equal(real_search_path, playlist->entries[i].path, - &playlist->config)) + if (!playlist_path_matches_entry(path_id, + &playlist->entries[i], &playlist->config)) { i++; continue; @@ -515,58 +694,61 @@ void playlist_delete_by_path(playlist_t *playlist, /* Entries are shifted up by the delete * operation - *do not* increment i */ } + + playlist_path_id_free(path_id); } void playlist_get_index_by_path(playlist_t *playlist, const char *search_path, const struct playlist_entry **entry) { + playlist_path_id_t *path_id = NULL; size_t i, len; - char real_search_path[PATH_MAX_LENGTH]; - - real_search_path[0] = '\0'; if (!playlist || !entry || string_is_empty(search_path)) return; - /* Get 'real' search path */ - strlcpy(real_search_path, search_path, sizeof(real_search_path)); - path_resolve_realpath(real_search_path, sizeof(real_search_path), true); + path_id = playlist_path_id_init(search_path); + if (!path_id) + return; for (i = 0, len = RBUF_LEN(playlist->entries); i < len; i++) { - if (!playlist_path_equal(real_search_path, playlist->entries[i].path, - &playlist->config)) + if (!playlist_path_matches_entry(path_id, + &playlist->entries[i], &playlist->config)) continue; *entry = &playlist->entries[i]; - break; } + + playlist_path_id_free(path_id); } bool playlist_entry_exists(playlist_t *playlist, const char *path) { + playlist_path_id_t *path_id = NULL; size_t i, len; - char real_search_path[PATH_MAX_LENGTH]; - - real_search_path[0] = '\0'; if (!playlist || string_is_empty(path)) return false; - /* Get 'real' search path */ - strlcpy(real_search_path, path, sizeof(real_search_path)); - path_resolve_realpath(real_search_path, sizeof(real_search_path), true); + path_id = playlist_path_id_init(path); + if (!path_id) + return false; for (i = 0, len = RBUF_LEN(playlist->entries); i < len; i++) { - if (playlist_path_equal(real_search_path, playlist->entries[i].path, - &playlist->config)) + if (playlist_path_matches_entry(path_id, + &playlist->entries[i], &playlist->config)) + { + playlist_path_id_free(path_id); return true; + } } + playlist_path_id_free(path_id); return false; } @@ -585,6 +767,13 @@ void playlist_update(playlist_t *playlist, size_t idx, if (entry->path) free(entry->path); entry->path = strdup(update_entry->path); + + if (entry->path_id) + { + playlist_path_id_free(entry->path_id); + entry->path_id = NULL; + } + playlist->modified = true; } @@ -645,8 +834,14 @@ void playlist_update_runtime(playlist_t *playlist, size_t idx, { if (entry->path) free(entry->path); - entry->path = NULL; entry->path = strdup(update_entry->path); + + if (entry->path_id) + { + playlist_path_id_free(entry->path_id); + entry->path_id = NULL; + } + playlist->modified = playlist->modified || register_update; } @@ -741,28 +936,25 @@ void playlist_update_runtime(playlist_t *playlist, size_t idx, bool playlist_push_runtime(playlist_t *playlist, const struct playlist_entry *entry) { + playlist_path_id_t *path_id = NULL; size_t i, len; - char real_path[PATH_MAX_LENGTH]; char real_core_path[PATH_MAX_LENGTH]; if (!playlist || !entry) - return false; + goto error; if (string_is_empty(entry->core_path)) { RARCH_ERR("cannot push NULL or empty core path into the playlist.\n"); - return false; + goto error; } - real_path[0] = '\0'; real_core_path[0] = '\0'; - /* Get 'real' path */ - if (!string_is_empty(entry->path)) - { - strlcpy(real_path, entry->path, sizeof(real_path)); - playlist_resolve_path(PLAYLIST_SAVE, false, real_path, sizeof(real_path)); - } + /* Get path ID */ + path_id = playlist_path_id_init(entry->path); + if (!path_id) + goto error; /* Get 'real' core path */ strlcpy(real_core_path, entry->core_path, sizeof(real_core_path)); @@ -774,30 +966,31 @@ bool playlist_push_runtime(playlist_t *playlist, if (string_is_empty(real_core_path)) { RARCH_ERR("cannot push NULL or empty core path into the playlist.\n"); - return false; + goto error; } len = RBUF_LEN(playlist->entries); for (i = 0; i < len; i++) { struct playlist_entry tmp; - const char *entry_path = playlist->entries[i].path; - bool equal_path = - (string_is_empty(real_path) && string_is_empty(entry_path)) || - playlist_path_equal(real_path, entry_path, &playlist->config); + bool equal_path = (string_is_empty(path_id->real_path) && + string_is_empty(playlist->entries[i].path)); + + equal_path = equal_path || playlist_path_matches_entry( + path_id, &playlist->entries[i], &playlist->config); - /* Core name can have changed while still being the same core. - * Differentiate based on the core path only. */ if (!equal_path) continue; + /* Core name can have changed while still being the same core. + * Differentiate based on the core path only. */ if (!playlist_core_path_equal(real_core_path, playlist->entries[i].core_path, &playlist->config)) continue; /* If top entry, we don't want to push a new entry since * the top and the entry to be pushed are the same. */ if (i == 0) - return false; + goto error; /* Seen it before, bump to top. */ tmp = playlist->entries[i]; @@ -809,7 +1002,7 @@ bool playlist_push_runtime(playlist_t *playlist, } if (playlist->config.capacity == 0) - return false; + goto error; if (len == playlist->config.capacity) { @@ -821,7 +1014,7 @@ bool playlist_push_runtime(playlist_t *playlist, { /* Allocate memory to fit one more item and resize the buffer */ if (!RBUF_TRYFIT(playlist->entries, len + 1)) - return false; /* out of memory */ + goto error; /* out of memory */ RBUF_RESIZE(playlist->entries, len + 1); } @@ -833,10 +1026,13 @@ bool playlist_push_runtime(playlist_t *playlist, playlist->entries[0].path = NULL; playlist->entries[0].core_path = NULL; - if (!string_is_empty(real_path)) - playlist->entries[0].path = strdup(real_path); + if (!string_is_empty(path_id->real_path)) + playlist->entries[0].path = strdup(path_id->real_path); + playlist->entries[0].path_id = path_id; + path_id = NULL; + if (!string_is_empty(real_core_path)) - playlist->entries[0].core_path = strdup(real_core_path); + playlist->entries[0].core_path = strdup(real_core_path); playlist->entries[0].runtime_status = entry->runtime_status; playlist->entries[0].runtime_hours = entry->runtime_hours; @@ -859,9 +1055,15 @@ bool playlist_push_runtime(playlist_t *playlist, } success: + if (path_id) + playlist_path_id_free(path_id); playlist->modified = true; - return true; + +error: + if (path_id) + playlist_path_id_free(path_id); + return false; } /** @@ -931,29 +1133,26 @@ bool playlist_push(playlist_t *playlist, const struct playlist_entry *entry) { size_t i, len; - char real_path[PATH_MAX_LENGTH]; char real_core_path[PATH_MAX_LENGTH]; - const char *core_name = entry->core_name; - bool entry_updated = false; + playlist_path_id_t *path_id = NULL; + const char *core_name = entry->core_name; + bool entry_updated = false; - real_path[0] = '\0'; real_core_path[0] = '\0'; if (!playlist || !entry) - return false; + goto error; if (string_is_empty(entry->core_path)) { RARCH_ERR("cannot push NULL or empty core path into the playlist.\n"); - return false; + goto error; } - /* Get 'real' path */ - if (!string_is_empty(entry->path)) - { - strlcpy(real_path, entry->path, sizeof(real_path)); - playlist_resolve_path(PLAYLIST_SAVE, false, real_path, sizeof(real_path)); - } + /* Get path ID */ + path_id = playlist_path_id_init(entry->path); + if (!path_id) + goto error; /* Get 'real' core path */ strlcpy(real_core_path, entry->core_path, sizeof(real_core_path)); @@ -965,7 +1164,7 @@ bool playlist_push(playlist_t *playlist, if (string_is_empty(real_core_path)) { RARCH_ERR("cannot push NULL or empty core path into the playlist.\n"); - return false; + goto error; } if (string_is_empty(core_name)) @@ -977,7 +1176,7 @@ bool playlist_push(playlist_t *playlist, if (string_is_empty(core_name)) { RARCH_ERR("cannot push NULL or empty core name into the playlist.\n"); - return false; + goto error; } } @@ -985,16 +1184,17 @@ bool playlist_push(playlist_t *playlist, for (i = 0; i < len; i++) { struct playlist_entry tmp; - const char *entry_path = playlist->entries[i].path; - bool equal_path = - (string_is_empty(real_path) && string_is_empty(entry_path)) || - playlist_path_equal(real_path, entry_path, &playlist->config); + bool equal_path = (string_is_empty(path_id->real_path) && + string_is_empty(playlist->entries[i].path)); + + equal_path = equal_path || playlist_path_matches_entry( + path_id, &playlist->entries[i], &playlist->config); - /* Core name can have changed while still being the same core. - * Differentiate based on the core path only. */ if (!equal_path) continue; + /* Core name can have changed while still being the same core. + * Differentiate based on the core path only. */ if (!playlist_core_path_equal(real_core_path, playlist->entries[i].core_path, &playlist->config)) continue; @@ -1084,7 +1284,7 @@ bool playlist_push(playlist_t *playlist, if (entry_updated) goto success; - return false; + goto error; } /* Seen it before, bump to top. */ @@ -1097,7 +1297,7 @@ bool playlist_push(playlist_t *playlist, } if (playlist->config.capacity == 0) - return false; + goto error; if (len == playlist->config.capacity) { @@ -1109,7 +1309,7 @@ bool playlist_push(playlist_t *playlist, { /* Allocate memory to fit one more item and resize the buffer */ if (!RBUF_TRYFIT(playlist->entries, len + 1)) - return false; /* out of memory */ + goto error; /* out of memory */ RBUF_RESIZE(playlist->entries, len + 1); } @@ -1129,6 +1329,7 @@ bool playlist_push(playlist_t *playlist, playlist->entries[0].runtime_str = NULL; playlist->entries[0].last_played_str = NULL; playlist->entries[0].subsystem_roms = NULL; + playlist->entries[0].path_id = NULL; playlist->entries[0].runtime_status = PLAYLIST_RUNTIME_UNKNOWN; playlist->entries[0].runtime_hours = 0; playlist->entries[0].runtime_minutes = 0; @@ -1139,8 +1340,12 @@ bool playlist_push(playlist_t *playlist, playlist->entries[0].last_played_hour = 0; playlist->entries[0].last_played_minute = 0; playlist->entries[0].last_played_second = 0; - if (!string_is_empty(real_path)) - playlist->entries[0].path = strdup(real_path); + + if (!string_is_empty(path_id->real_path)) + playlist->entries[0].path = strdup(path_id->real_path); + playlist->entries[0].path_id = path_id; + path_id = NULL; + if (!string_is_empty(entry->label)) playlist->entries[0].label = strdup(entry->label); if (!string_is_empty(real_core_path)) @@ -1168,9 +1373,15 @@ bool playlist_push(playlist_t *playlist, } success: + if (path_id) + playlist_path_id_free(path_id); playlist->modified = true; - return true; + +error: + if (path_id) + playlist_path_id_free(path_id); + return false; } void playlist_write_runtime_file(playlist_t *playlist) @@ -2674,6 +2885,41 @@ bool playlist_entries_are_equal( return playlist_core_path_equal(real_core_path_a, entry_b->core_path, config); } +/* Returns true if entries at specified indices + * of specified playlist have identical content + * and core paths */ +bool playlist_index_entries_are_equal( + playlist_t *playlist, size_t idx_a, size_t idx_b) +{ + struct playlist_entry *entry_a = NULL; + struct playlist_entry *entry_b = NULL; + size_t len; + + if (!playlist) + return false; + + len = RBUF_LEN(playlist->entries); + + if ((idx_a >= len) || (idx_b >= len)) + return false; + + /* Fetch entries */ + entry_a = &playlist->entries[idx_a]; + entry_b = &playlist->entries[idx_b]; + + if (!entry_a || !entry_b) + return false; + + /* Initialise path ID for entry A, if required + * (entry B will be handled inside + * playlist_path_matches_entry()) */ + if (!entry_a->path_id) + entry_a->path_id = playlist_path_id_init(entry_a->path); + + return playlist_path_matches_entry( + entry_a->path_id, entry_b, &playlist->config); +} + void playlist_get_crc32(playlist_t *playlist, size_t idx, const char **crc32) { diff --git a/playlist.h b/playlist.h index 7750455a02..09799d5eb3 100644 --- a/playlist.h +++ b/playlist.h @@ -86,6 +86,18 @@ enum playlist_thumbnail_id PLAYLIST_THUMBNAIL_LEFT }; +/* Holds all parameters required to uniquely + * identify a playlist content path */ +typedef struct +{ + char *real_path; + char *archive_path; + uint32_t real_path_hash; + uint32_t archive_path_hash; + bool is_archive; + bool is_in_archive; +} playlist_path_id_t; + struct playlist_entry { char *path; @@ -99,6 +111,7 @@ struct playlist_entry char *runtime_str; char *last_played_str; struct string_list *subsystem_roms; + playlist_path_id_t *path_id; unsigned runtime_hours; unsigned runtime_minutes; unsigned runtime_seconds; @@ -312,6 +325,12 @@ bool playlist_entries_are_equal( const struct playlist_entry *entry_b, const playlist_config_t *config); +/* Returns true if entries at specified indices + * of specified playlist have identical content + * and core paths */ +bool playlist_index_entries_are_equal( + playlist_t *playlist, size_t idx_a, size_t idx_b); + void playlist_get_crc32(playlist_t *playlist, size_t idx, const char **crc32); diff --git a/tasks/task_playlist_manager.c b/tasks/task_playlist_manager.c index 8163279002..d92b1ec163 100644 --- a/tasks/task_playlist_manager.c +++ b/tasks/task_playlist_manager.c @@ -605,43 +605,27 @@ static void task_pl_manager_clean_playlist_handler(retro_task_t *task) break; case PL_MANAGER_ITERATE_ENTRY_CHECK_DUPLICATE: { - const struct playlist_entry *entry = NULL; - bool entry_deleted = false; + bool entry_deleted = false; + size_t i; /* Update progress display */ task_set_progress(task, (pl_manager->list_index * 100) / pl_manager->list_size); - /* Get current entry */ - playlist_get_index( - pl_manager->playlist, pl_manager->list_index, &entry); - - if (entry) + /* Check whether the content + core paths of the + * current entry match those of any subsequent + * entry */ + for (i = pl_manager->list_index + 1; i < pl_manager->list_size; i++) { - size_t i; - - /* Loop over all subsequent entries, and check - * whether content + core paths are the same */ - for (i = pl_manager->list_index + 1; i < pl_manager->list_size; i++) + if (playlist_index_entries_are_equal(pl_manager->playlist, + pl_manager->list_index, i)) { - const struct playlist_entry *next_entry = NULL; + /* Duplicate found - delete entry */ + playlist_delete_index(pl_manager->playlist, pl_manager->list_index); + entry_deleted = true; - /* Get next entry */ - playlist_get_index(pl_manager->playlist, i, &next_entry); - - if (!next_entry) - continue; - - if (playlist_entries_are_equal( - entry, next_entry, &pl_manager->playlist_config)) - { - /* Duplicate found - delete entry */ - playlist_delete_index(pl_manager->playlist, pl_manager->list_index); - entry_deleted = true; - - /* Update list_size */ - pl_manager->list_size = playlist_size(pl_manager->playlist); - break; - } + /* Update list_size */ + pl_manager->list_size = playlist_size(pl_manager->playlist); + break; } }