(Playlist) Optimise scanning of large rom sets

This commit is contained in:
jdgleaver 2021-06-29 14:48:18 +01:00
parent 92a098ee02
commit f7f007519c
3 changed files with 356 additions and 107 deletions

View File

@ -213,6 +213,88 @@ static void path_replace_base_path_and_convert_to_local_file_system(
strlcpy(out_path, in_path, size);
}
/* Generates a case insensitive hash for the
* specified path string */
static uint32_t playlist_path_hash(const char *path)
{
unsigned char c;
uint32_t hash = (uint32_t)0x811c9dc5;
while ((c = (unsigned char)*(path++)) != '\0')
hash = ((hash * (uint32_t)0x01000193) ^ (uint32_t)((c >= 'A' && c <= 'Z') ? (c | 0x20) : c));
return (hash ? hash : 1);
}
static void playlist_path_id_free(playlist_path_id_t *path_id)
{
if (!path_id)
return;
if (path_id->archive_path &&
(path_id->archive_path != path_id->real_path))
free(path_id->archive_path);
if (path_id->real_path)
free(path_id->real_path);
free(path_id);
}
static playlist_path_id_t *playlist_path_id_init(const char *path)
{
playlist_path_id_t *path_id = (playlist_path_id_t*)malloc(sizeof(*path_id));
const char *archive_delim = NULL;
char real_path[PATH_MAX_LENGTH];
real_path[0] = '\0';
if (!path_id)
return NULL;
path_id->real_path = NULL;
path_id->archive_path = NULL;
path_id->real_path_hash = 0;
path_id->archive_path_hash = 0;
path_id->is_archive = false;
path_id->is_in_archive = false;
if (string_is_empty(path))
return path_id;
/* Get real path */
strlcpy(real_path, path, sizeof(real_path));
playlist_resolve_path(PLAYLIST_SAVE, false, real_path,
sizeof(real_path));
path_id->real_path = strdup(real_path);
path_id->real_path_hash = playlist_path_hash(real_path);
/* Check archive status */
path_id->is_archive = path_is_compressed_file(real_path);
archive_delim = path_get_archive_delim(real_path);
/* If path refers to a file inside an archive,
* extract the path of the parent archive */
if (archive_delim)
{
size_t len = (1 + archive_delim - real_path);
char archive_path[PATH_MAX_LENGTH] = {0};
len = (len < PATH_MAX_LENGTH) ? len : PATH_MAX_LENGTH;
strlcpy(archive_path, real_path, len * sizeof(char));
path_id->archive_path = strdup(archive_path);
path_id->archive_path_hash = playlist_path_hash(archive_path);
path_id->is_in_archive = true;
}
else if (path_id->is_archive)
{
path_id->archive_path = path_id->real_path;
path_id->archive_path_hash = path_id->real_path_hash;
}
return path_id;
}
/**
* playlist_path_equal:
* @real_path : 'Real' search path, generated by path_resolve_realpath()
@ -306,6 +388,102 @@ static bool playlist_path_equal(const char *real_path,
return false;
}
/**
* playlist_path_matches_entry:
* @path_id : Path identity, containing 'real' path,
* hash and archive status information
* @entry : Playlist entry to compare with path_id
*
* Returns 'true' if 'path_id' matches path information
* contained in specified 'entry'. Will update path_id
* cache inside specified 'entry', if not already present.
**/
static bool playlist_path_matches_entry(playlist_path_id_t *path_id,
struct playlist_entry *entry, const playlist_config_t *config)
{
/* Sanity check */
if (!path_id ||
!entry ||
!config)
return false;
/* Check whether entry contains a path ID cache */
if (!entry->path_id)
{
entry->path_id = playlist_path_id_init(entry->path);
if (!entry->path_id)
return false;
}
/* Ensure we have valid real_path strings */
if (string_is_empty(path_id->real_path) ||
string_is_empty(entry->path_id->real_path))
return false;
/* First pass comparison */
if (path_id->real_path_hash ==
entry->path_id->real_path_hash)
{
#ifdef _WIN32
/* Handle case-insensitive operating systems*/
if (string_is_equal_noncase(path_id->real_path,
entry->path_id->real_path))
return true;
#else
if (string_is_equal(path_id->real_path,
entry->path_id->real_path))
return true;
#endif
}
#ifdef RARCH_INTERNAL
/* If fuzzy matching is disabled, we can give up now */
if (!config->fuzzy_archive_match)
return false;
#endif
/* If we reach this point, we have to work
* harder...
* Need to handle a rather awkward archive file
* case where:
* - playlist path contains a properly formatted
* [archive_path][delimiter][rom_file]
* - search path is just [archive_path]
* ...or vice versa.
* This pretty much always happens when a playlist
* is generated via scan content (which handles the
* archive paths correctly), but the user subsequently
* loads an archive file via the command line or some
* external launcher (where the [delimiter][rom_file]
* part is almost always omitted) */
if (((path_id->is_archive && !path_id->is_in_archive) && entry->path_id->is_in_archive) ||
((entry->path_id->is_archive && !entry->path_id->is_in_archive) && path_id->is_in_archive))
{
/* Ensure we have valid parent archive path
* strings */
if (string_is_empty(path_id->archive_path) ||
string_is_empty(entry->path_id->archive_path))
return false;
if (path_id->archive_path_hash ==
entry->path_id->archive_path_hash)
{
#ifdef _WIN32
/* Handle case-insensitive operating systems*/
if (string_is_equal_noncase(path_id->archive_path,
entry->path_id->archive_path))
return true;
#else
if (string_is_equal(path_id->archive_path,
entry->path_id->archive_path))
return true;
#endif
}
}
return false;
}
/**
* playlist_core_path_equal:
* @real_core_path : 'Real' search path, generated by path_resolve_realpath()
@ -419,6 +597,8 @@ static void playlist_free_entry(struct playlist_entry *entry)
free(entry->last_played_str);
if (entry->subsystem_roms)
string_list_free(entry->subsystem_roms);
if (entry->path_id)
playlist_path_id_free(entry->path_id);
entry->path = NULL;
entry->label = NULL;
@ -431,6 +611,7 @@ static void playlist_free_entry(struct playlist_entry *entry)
entry->runtime_str = NULL;
entry->last_played_str = NULL;
entry->subsystem_roms = NULL;
entry->path_id = NULL;
entry->runtime_status = PLAYLIST_RUNTIME_UNKNOWN;
entry->runtime_hours = 0;
entry->runtime_minutes = 0;
@ -488,22 +669,20 @@ void playlist_delete_index(playlist_t *playlist,
void playlist_delete_by_path(playlist_t *playlist,
const char *search_path)
{
size_t i = 0;
char real_search_path[PATH_MAX_LENGTH];
real_search_path[0] = '\0';
playlist_path_id_t *path_id = NULL;
size_t i = 0;
if (!playlist || string_is_empty(search_path))
return;
/* Get 'real' search path */
strlcpy(real_search_path, search_path, sizeof(real_search_path));
path_resolve_realpath(real_search_path, sizeof(real_search_path), true);
path_id = playlist_path_id_init(search_path);
if (!path_id)
return;
while (i < RBUF_LEN(playlist->entries))
{
if (!playlist_path_equal(real_search_path, playlist->entries[i].path,
&playlist->config))
if (!playlist_path_matches_entry(path_id,
&playlist->entries[i], &playlist->config))
{
i++;
continue;
@ -515,58 +694,61 @@ void playlist_delete_by_path(playlist_t *playlist,
/* Entries are shifted up by the delete
* operation - *do not* increment i */
}
playlist_path_id_free(path_id);
}
void playlist_get_index_by_path(playlist_t *playlist,
const char *search_path,
const struct playlist_entry **entry)
{
playlist_path_id_t *path_id = NULL;
size_t i, len;
char real_search_path[PATH_MAX_LENGTH];
real_search_path[0] = '\0';
if (!playlist || !entry || string_is_empty(search_path))
return;
/* Get 'real' search path */
strlcpy(real_search_path, search_path, sizeof(real_search_path));
path_resolve_realpath(real_search_path, sizeof(real_search_path), true);
path_id = playlist_path_id_init(search_path);
if (!path_id)
return;
for (i = 0, len = RBUF_LEN(playlist->entries); i < len; i++)
{
if (!playlist_path_equal(real_search_path, playlist->entries[i].path,
&playlist->config))
if (!playlist_path_matches_entry(path_id,
&playlist->entries[i], &playlist->config))
continue;
*entry = &playlist->entries[i];
break;
}
playlist_path_id_free(path_id);
}
bool playlist_entry_exists(playlist_t *playlist,
const char *path)
{
playlist_path_id_t *path_id = NULL;
size_t i, len;
char real_search_path[PATH_MAX_LENGTH];
real_search_path[0] = '\0';
if (!playlist || string_is_empty(path))
return false;
/* Get 'real' search path */
strlcpy(real_search_path, path, sizeof(real_search_path));
path_resolve_realpath(real_search_path, sizeof(real_search_path), true);
path_id = playlist_path_id_init(path);
if (!path_id)
return false;
for (i = 0, len = RBUF_LEN(playlist->entries); i < len; i++)
{
if (playlist_path_equal(real_search_path, playlist->entries[i].path,
&playlist->config))
if (playlist_path_matches_entry(path_id,
&playlist->entries[i], &playlist->config))
{
playlist_path_id_free(path_id);
return true;
}
}
playlist_path_id_free(path_id);
return false;
}
@ -585,6 +767,13 @@ void playlist_update(playlist_t *playlist, size_t idx,
if (entry->path)
free(entry->path);
entry->path = strdup(update_entry->path);
if (entry->path_id)
{
playlist_path_id_free(entry->path_id);
entry->path_id = NULL;
}
playlist->modified = true;
}
@ -645,8 +834,14 @@ void playlist_update_runtime(playlist_t *playlist, size_t idx,
{
if (entry->path)
free(entry->path);
entry->path = NULL;
entry->path = strdup(update_entry->path);
if (entry->path_id)
{
playlist_path_id_free(entry->path_id);
entry->path_id = NULL;
}
playlist->modified = playlist->modified || register_update;
}
@ -741,28 +936,25 @@ void playlist_update_runtime(playlist_t *playlist, size_t idx,
bool playlist_push_runtime(playlist_t *playlist,
const struct playlist_entry *entry)
{
playlist_path_id_t *path_id = NULL;
size_t i, len;
char real_path[PATH_MAX_LENGTH];
char real_core_path[PATH_MAX_LENGTH];
if (!playlist || !entry)
return false;
goto error;
if (string_is_empty(entry->core_path))
{
RARCH_ERR("cannot push NULL or empty core path into the playlist.\n");
return false;
goto error;
}
real_path[0] = '\0';
real_core_path[0] = '\0';
/* Get 'real' path */
if (!string_is_empty(entry->path))
{
strlcpy(real_path, entry->path, sizeof(real_path));
playlist_resolve_path(PLAYLIST_SAVE, false, real_path, sizeof(real_path));
}
/* Get path ID */
path_id = playlist_path_id_init(entry->path);
if (!path_id)
goto error;
/* Get 'real' core path */
strlcpy(real_core_path, entry->core_path, sizeof(real_core_path));
@ -774,30 +966,31 @@ bool playlist_push_runtime(playlist_t *playlist,
if (string_is_empty(real_core_path))
{
RARCH_ERR("cannot push NULL or empty core path into the playlist.\n");
return false;
goto error;
}
len = RBUF_LEN(playlist->entries);
for (i = 0; i < len; i++)
{
struct playlist_entry tmp;
const char *entry_path = playlist->entries[i].path;
bool equal_path =
(string_is_empty(real_path) && string_is_empty(entry_path)) ||
playlist_path_equal(real_path, entry_path, &playlist->config);
bool equal_path = (string_is_empty(path_id->real_path) &&
string_is_empty(playlist->entries[i].path));
equal_path = equal_path || playlist_path_matches_entry(
path_id, &playlist->entries[i], &playlist->config);
/* Core name can have changed while still being the same core.
* Differentiate based on the core path only. */
if (!equal_path)
continue;
/* Core name can have changed while still being the same core.
* Differentiate based on the core path only. */
if (!playlist_core_path_equal(real_core_path, playlist->entries[i].core_path, &playlist->config))
continue;
/* If top entry, we don't want to push a new entry since
* the top and the entry to be pushed are the same. */
if (i == 0)
return false;
goto error;
/* Seen it before, bump to top. */
tmp = playlist->entries[i];
@ -809,7 +1002,7 @@ bool playlist_push_runtime(playlist_t *playlist,
}
if (playlist->config.capacity == 0)
return false;
goto error;
if (len == playlist->config.capacity)
{
@ -821,7 +1014,7 @@ bool playlist_push_runtime(playlist_t *playlist,
{
/* Allocate memory to fit one more item and resize the buffer */
if (!RBUF_TRYFIT(playlist->entries, len + 1))
return false; /* out of memory */
goto error; /* out of memory */
RBUF_RESIZE(playlist->entries, len + 1);
}
@ -833,10 +1026,13 @@ bool playlist_push_runtime(playlist_t *playlist,
playlist->entries[0].path = NULL;
playlist->entries[0].core_path = NULL;
if (!string_is_empty(real_path))
playlist->entries[0].path = strdup(real_path);
if (!string_is_empty(path_id->real_path))
playlist->entries[0].path = strdup(path_id->real_path);
playlist->entries[0].path_id = path_id;
path_id = NULL;
if (!string_is_empty(real_core_path))
playlist->entries[0].core_path = strdup(real_core_path);
playlist->entries[0].core_path = strdup(real_core_path);
playlist->entries[0].runtime_status = entry->runtime_status;
playlist->entries[0].runtime_hours = entry->runtime_hours;
@ -859,9 +1055,15 @@ bool playlist_push_runtime(playlist_t *playlist,
}
success:
if (path_id)
playlist_path_id_free(path_id);
playlist->modified = true;
return true;
error:
if (path_id)
playlist_path_id_free(path_id);
return false;
}
/**
@ -931,29 +1133,26 @@ bool playlist_push(playlist_t *playlist,
const struct playlist_entry *entry)
{
size_t i, len;
char real_path[PATH_MAX_LENGTH];
char real_core_path[PATH_MAX_LENGTH];
const char *core_name = entry->core_name;
bool entry_updated = false;
playlist_path_id_t *path_id = NULL;
const char *core_name = entry->core_name;
bool entry_updated = false;
real_path[0] = '\0';
real_core_path[0] = '\0';
if (!playlist || !entry)
return false;
goto error;
if (string_is_empty(entry->core_path))
{
RARCH_ERR("cannot push NULL or empty core path into the playlist.\n");
return false;
goto error;
}
/* Get 'real' path */
if (!string_is_empty(entry->path))
{
strlcpy(real_path, entry->path, sizeof(real_path));
playlist_resolve_path(PLAYLIST_SAVE, false, real_path, sizeof(real_path));
}
/* Get path ID */
path_id = playlist_path_id_init(entry->path);
if (!path_id)
goto error;
/* Get 'real' core path */
strlcpy(real_core_path, entry->core_path, sizeof(real_core_path));
@ -965,7 +1164,7 @@ bool playlist_push(playlist_t *playlist,
if (string_is_empty(real_core_path))
{
RARCH_ERR("cannot push NULL or empty core path into the playlist.\n");
return false;
goto error;
}
if (string_is_empty(core_name))
@ -977,7 +1176,7 @@ bool playlist_push(playlist_t *playlist,
if (string_is_empty(core_name))
{
RARCH_ERR("cannot push NULL or empty core name into the playlist.\n");
return false;
goto error;
}
}
@ -985,16 +1184,17 @@ bool playlist_push(playlist_t *playlist,
for (i = 0; i < len; i++)
{
struct playlist_entry tmp;
const char *entry_path = playlist->entries[i].path;
bool equal_path =
(string_is_empty(real_path) && string_is_empty(entry_path)) ||
playlist_path_equal(real_path, entry_path, &playlist->config);
bool equal_path = (string_is_empty(path_id->real_path) &&
string_is_empty(playlist->entries[i].path));
equal_path = equal_path || playlist_path_matches_entry(
path_id, &playlist->entries[i], &playlist->config);
/* Core name can have changed while still being the same core.
* Differentiate based on the core path only. */
if (!equal_path)
continue;
/* Core name can have changed while still being the same core.
* Differentiate based on the core path only. */
if (!playlist_core_path_equal(real_core_path, playlist->entries[i].core_path, &playlist->config))
continue;
@ -1084,7 +1284,7 @@ bool playlist_push(playlist_t *playlist,
if (entry_updated)
goto success;
return false;
goto error;
}
/* Seen it before, bump to top. */
@ -1097,7 +1297,7 @@ bool playlist_push(playlist_t *playlist,
}
if (playlist->config.capacity == 0)
return false;
goto error;
if (len == playlist->config.capacity)
{
@ -1109,7 +1309,7 @@ bool playlist_push(playlist_t *playlist,
{
/* Allocate memory to fit one more item and resize the buffer */
if (!RBUF_TRYFIT(playlist->entries, len + 1))
return false; /* out of memory */
goto error; /* out of memory */
RBUF_RESIZE(playlist->entries, len + 1);
}
@ -1129,6 +1329,7 @@ bool playlist_push(playlist_t *playlist,
playlist->entries[0].runtime_str = NULL;
playlist->entries[0].last_played_str = NULL;
playlist->entries[0].subsystem_roms = NULL;
playlist->entries[0].path_id = NULL;
playlist->entries[0].runtime_status = PLAYLIST_RUNTIME_UNKNOWN;
playlist->entries[0].runtime_hours = 0;
playlist->entries[0].runtime_minutes = 0;
@ -1139,8 +1340,12 @@ bool playlist_push(playlist_t *playlist,
playlist->entries[0].last_played_hour = 0;
playlist->entries[0].last_played_minute = 0;
playlist->entries[0].last_played_second = 0;
if (!string_is_empty(real_path))
playlist->entries[0].path = strdup(real_path);
if (!string_is_empty(path_id->real_path))
playlist->entries[0].path = strdup(path_id->real_path);
playlist->entries[0].path_id = path_id;
path_id = NULL;
if (!string_is_empty(entry->label))
playlist->entries[0].label = strdup(entry->label);
if (!string_is_empty(real_core_path))
@ -1168,9 +1373,15 @@ bool playlist_push(playlist_t *playlist,
}
success:
if (path_id)
playlist_path_id_free(path_id);
playlist->modified = true;
return true;
error:
if (path_id)
playlist_path_id_free(path_id);
return false;
}
void playlist_write_runtime_file(playlist_t *playlist)
@ -2674,6 +2885,41 @@ bool playlist_entries_are_equal(
return playlist_core_path_equal(real_core_path_a, entry_b->core_path, config);
}
/* Returns true if entries at specified indices
* of specified playlist have identical content
* and core paths */
bool playlist_index_entries_are_equal(
playlist_t *playlist, size_t idx_a, size_t idx_b)
{
struct playlist_entry *entry_a = NULL;
struct playlist_entry *entry_b = NULL;
size_t len;
if (!playlist)
return false;
len = RBUF_LEN(playlist->entries);
if ((idx_a >= len) || (idx_b >= len))
return false;
/* Fetch entries */
entry_a = &playlist->entries[idx_a];
entry_b = &playlist->entries[idx_b];
if (!entry_a || !entry_b)
return false;
/* Initialise path ID for entry A, if required
* (entry B will be handled inside
* playlist_path_matches_entry()) */
if (!entry_a->path_id)
entry_a->path_id = playlist_path_id_init(entry_a->path);
return playlist_path_matches_entry(
entry_a->path_id, entry_b, &playlist->config);
}
void playlist_get_crc32(playlist_t *playlist, size_t idx,
const char **crc32)
{

View File

@ -86,6 +86,18 @@ enum playlist_thumbnail_id
PLAYLIST_THUMBNAIL_LEFT
};
/* Holds all parameters required to uniquely
* identify a playlist content path */
typedef struct
{
char *real_path;
char *archive_path;
uint32_t real_path_hash;
uint32_t archive_path_hash;
bool is_archive;
bool is_in_archive;
} playlist_path_id_t;
struct playlist_entry
{
char *path;
@ -99,6 +111,7 @@ struct playlist_entry
char *runtime_str;
char *last_played_str;
struct string_list *subsystem_roms;
playlist_path_id_t *path_id;
unsigned runtime_hours;
unsigned runtime_minutes;
unsigned runtime_seconds;
@ -312,6 +325,12 @@ bool playlist_entries_are_equal(
const struct playlist_entry *entry_b,
const playlist_config_t *config);
/* Returns true if entries at specified indices
* of specified playlist have identical content
* and core paths */
bool playlist_index_entries_are_equal(
playlist_t *playlist, size_t idx_a, size_t idx_b);
void playlist_get_crc32(playlist_t *playlist, size_t idx,
const char **crc32);

View File

@ -605,43 +605,27 @@ static void task_pl_manager_clean_playlist_handler(retro_task_t *task)
break;
case PL_MANAGER_ITERATE_ENTRY_CHECK_DUPLICATE:
{
const struct playlist_entry *entry = NULL;
bool entry_deleted = false;
bool entry_deleted = false;
size_t i;
/* Update progress display */
task_set_progress(task, (pl_manager->list_index * 100) / pl_manager->list_size);
/* Get current entry */
playlist_get_index(
pl_manager->playlist, pl_manager->list_index, &entry);
if (entry)
/* Check whether the content + core paths of the
* current entry match those of any subsequent
* entry */
for (i = pl_manager->list_index + 1; i < pl_manager->list_size; i++)
{
size_t i;
/* Loop over all subsequent entries, and check
* whether content + core paths are the same */
for (i = pl_manager->list_index + 1; i < pl_manager->list_size; i++)
if (playlist_index_entries_are_equal(pl_manager->playlist,
pl_manager->list_index, i))
{
const struct playlist_entry *next_entry = NULL;
/* Duplicate found - delete entry */
playlist_delete_index(pl_manager->playlist, pl_manager->list_index);
entry_deleted = true;
/* Get next entry */
playlist_get_index(pl_manager->playlist, i, &next_entry);
if (!next_entry)
continue;
if (playlist_entries_are_equal(
entry, next_entry, &pl_manager->playlist_config))
{
/* Duplicate found - delete entry */
playlist_delete_index(pl_manager->playlist, pl_manager->list_index);
entry_deleted = true;
/* Update list_size */
pl_manager->list_size = playlist_size(pl_manager->playlist);
break;
}
/* Update list_size */
pl_manager->list_size = playlist_size(pl_manager->playlist);
break;
}
}