Various improvements/fixes to metadata scanning and indexing:

* Read rating value from metadata while indexing
  * Parse ID3V2 tags from AAC files if present
  * Minor cleanup to file extension detection in TaglibMetadataReader
  * Fix bug that may cause the track's "title" field to get populated
    with the filename/uri instead of the track title.
This commit is contained in:
casey langen 2023-07-28 13:31:18 -07:00
parent c8ff55e533
commit c58c4c5fab
3 changed files with 177 additions and 136 deletions

View File

@ -156,7 +156,7 @@ double IndexerTrack::GetDouble(const char* key, double defaultValue) {
}
void IndexerTrack::SetValue(const char* metakey, const char* value) {
if (metakey && value) {
if (metakey && value && strlen(value)) {
this->internalMetadata->metadata.insert(
std::pair<std::string, std::string>(metakey,value));
}
@ -345,35 +345,35 @@ static int64_t writeToTracksTable(
query =
"UPDATE tracks "
"SET track=?, disc=?, bpm=?, duration=?, filesize=?, "
" title=?, filename=?, filetime=?, path_id=?, "
" title=?, rating=?, filename=?, filetime=?, path_id=?, "
" date_updated=julianday('now'), external_id=? "
"WHERE id=?";
}
else {
query =
"INSERT INTO tracks "
"(track, disc, bpm, duration, filesize, title, filename, "
"(track, disc, bpm, duration, filesize, title, rating, filename, "
" filetime, path_id, external_id, date_added, date_updated) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, julianday('now'), julianday('now'))";
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, julianday('now'), julianday('now'))";
}
db::Statement stmt(query.c_str(), dbConnection);
auto time = track.GetInt64("filetime");
stmt.BindInt32(0, stringToInt(track.GetString("track"), 1));
stmt.BindInt32(1, stringToInt(track.GetString("disc"), 1));
stmt.BindText(2, track.GetString("bpm"));
stmt.BindInt32(3, track.GetInt32("duration"));
stmt.BindInt32(4, track.GetInt32("filesize"));
stmt.BindText(5, track.GetString("title"));
stmt.BindText(6, track.GetString("filename"));
stmt.BindInt64(7, track.GetInt64("filetime"));
stmt.BindInt64(8, track.GetInt64("path_id"));
stmt.BindText(9, track.GetString("external_id"));
int bindPos = 0;
stmt.BindInt32(bindPos++, stringToInt(track.GetString("track"), 1));
stmt.BindInt32(bindPos++, stringToInt(track.GetString("disc"), 1));
stmt.BindText(bindPos++, track.GetString("bpm"));
stmt.BindInt32(bindPos++, track.GetInt32("duration"));
stmt.BindInt32(bindPos++, track.GetInt32("filesize"));
stmt.BindText(bindPos++, track.GetString("title"));
stmt.BindInt32(bindPos++, stringToInt(track.GetString("rating"), 0));
stmt.BindText(bindPos++, track.GetString("filename"));
stmt.BindInt64(bindPos++, track.GetInt64("filetime"));
stmt.BindInt64(bindPos++, track.GetInt64("path_id"));
stmt.BindText(bindPos++, track.GetString("external_id"));
if (id != 0) {
stmt.BindInt64(10, id);
stmt.BindInt64(bindPos++, id);
}
if (stmt.Step() == db::Done) {
@ -397,23 +397,24 @@ static void removeRelation(
}
static void removeKnownFields(Track::MetadataMap& metadata) {
metadata.erase("track");
metadata.erase("disc");
metadata.erase("bpm");
metadata.erase("duration");
metadata.erase("title");
metadata.erase("filename");
metadata.erase("filetime");
metadata.erase("filesize");
metadata.erase("title");
metadata.erase("path");
metadata.erase("extension");
metadata.erase("genre");
metadata.erase("artist");
metadata.erase("album_artist");
metadata.erase("album");
metadata.erase("source_id");
metadata.erase("artist");
metadata.erase("bpm");
metadata.erase("disc");
metadata.erase("duration");
metadata.erase("extension");
metadata.erase("external_id");
metadata.erase("filename");
metadata.erase("filesize");
metadata.erase("filetime");
metadata.erase("genre");
metadata.erase("path");
metadata.erase("rating");
metadata.erase("source_id");
metadata.erase("title");
metadata.erase("title");
metadata.erase("track");
metadata.erase("visible");
}

View File

@ -62,6 +62,7 @@
#include <vector>
#include <string>
#include <set>
#include <iostream>
#include <functional>
#include <cctype>
@ -70,6 +71,29 @@
using namespace musik::core::sdk;
static std::set<std::string> SUPPORTED_FORMATS = {
"aac",
"aif",
"aiff",
"alac",
"ape",
"flac",
"m4a",
"mp3",
"mpc",
"ogg",
"opus",
"wav",
"wave",
"wma",
"wv",
};
static std::set<std::string> ID3V2_FORMATS = {
"mp3",
"aac"
};
#ifdef WIN32
static inline std::wstring utf8to16(const char* utf8) {
int size = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, 0, 0);
@ -184,24 +208,8 @@ bool TaglibMetadataReader::CanRead(const char *extension) {
if (extension && strlen(extension)) {
std::string withoutLeadingDot = std::string(extension[0] == '.' ? &extension[1] : extension);
std::string ext = str::ToLowerCopy(withoutLeadingDot);
return
ext.compare("opus") == 0 ||
ext.compare("wv") == 0 ||
ext.compare("wma") == 0 ||
ext.compare("ape") == 0 ||
ext.compare("mpc") == 0 ||
ext.compare("aac") == 0 ||
ext.compare("alac") == 0 ||
ext.compare("wav") == 0 ||
ext.compare("wave") == 0 ||
ext.compare("aif") == 0 ||
ext.compare("aiff") == 0 ||
ext.compare("mp3") == 0 ||
ext.compare("ogg") == 0 ||
ext.compare("m4a") == 0 ||
ext.compare("flac") == 0;
return SUPPORTED_FORMATS.find(ext) != SUPPORTED_FORMATS.end();
}
return false;
}
@ -225,11 +233,16 @@ bool TaglibMetadataReader::Read(const char* uri, ITagStore *track) {
/* ID3v2 is a trainwreck, so it requires special processing */
if (extension.size()) {
if (str::ToLowerCopy(extension) == "mp3") {
if (ID3V2_FORMATS.find(str::ToLowerCopy(extension)) != ID3V2_FORMATS.end()) {
this->ReadID3V2(uri, track);
}
}
/* always use the filename as the title if we can't resolve one */
if (!track->Contains("title")) {
this->SetTagValue("title", uri, track);
}
return true;
}
@ -250,88 +263,83 @@ bool TaglibMetadataReader::ReadGeneric(
file = resolveOggType(uri);
}
if (file.isNull()) {
this->SetTagValue("title", uri, target);
}
else {
TagLib::Tag *tag = file.tag();
if (tag) {
this->ReadBasicData(file.tag(), uri, target);
TagLib::Tag *tag = file.tag();
if (tag) {
this->ReadBasicData(file.tag(), uri, target);
/* wav files can have metadata in the RIFF header, or, in some cases,
with an embedded id3v2 tag */
auto wavFile = dynamic_cast<TagLib::RIFF::WAV::File*>(file.file());
if (wavFile) {
if (wavFile->hasInfoTag()) {
this->ReadBasicData(wavFile->InfoTag(), uri, target);
}
if (wavFile->hasID3v2Tag()) {
this->ReadID3V2(wavFile->ID3v2Tag(), target);
}
/* wav files can have metadata in the RIFF header, or, in some cases,
with an embedded id3v2 tag */
auto wavFile = dynamic_cast<TagLib::RIFF::WAV::File*>(file.file());
if (wavFile) {
if (wavFile->hasInfoTag()) {
this->ReadBasicData(wavFile->InfoTag(), uri, target);
}
/* aif files are similar to wav files, but for some reason taglib
doesn't seem to expose non-id3v2 tags */
const auto aifFile = dynamic_cast<TagLib::RIFF::AIFF::File*>(file.file());
if (aifFile) {
if (aifFile->hasID3v2Tag()) {
this->ReadID3V2(aifFile->tag(), target);
}
if (wavFile->hasID3v2Tag()) {
this->ReadID3V2(wavFile->ID3v2Tag(), target);
}
/* taglib hides certain properties (like album artist) in the XiphComment's
field list. if we're dealing with a straight-up Xiph tag, process it now */
const auto xiphTag = dynamic_cast<TagLib::Ogg::XiphComment*>(tag);
if (xiphTag) {
processAlbumArt(xiphTag->pictureList(), target);
this->ReadFromMap(xiphTag->fieldListMap(), target);
this->ExtractReplayGain(xiphTag->fieldListMap(), target);
}
/* if this isn't a xiph tag, the file format may have some other custom
properties. let's see if we can pull them out here... */
if (!xiphTag) {
bool handled = false;
/* flac files may have more than one type of tag embedded. see if there's
see if there's a xiph comment buried deep. */
auto flacFile = dynamic_cast<TagLib::FLAC::File*>(file.file());
if (flacFile) {
processAlbumArt(flacFile->pictureList(), target);
if (flacFile->hasXiphComment()) {
this->ReadFromMap(flacFile->xiphComment()->fieldListMap(), target);
this->ExtractReplayGain(flacFile->xiphComment()->fieldListMap(), target);
handled = true;
}
}
/* similarly, mp4 buries disc number and album artist. however, taglib does
NOT exposed a map with normalized keys, so we have to do special property
handling here... */
if (!handled) {
const auto mp4File = dynamic_cast<TagLib::MP4::File*>(file.file());
if (mp4File && mp4File->hasMP4Tag()) {
auto mp4TagMap = static_cast<TagLib::MP4::Tag*>(tag)->itemListMap();
this->ExtractValueForKey(mp4TagMap, "aART", "album_artist", target);
this->ExtractValueForKey(mp4TagMap, "disk", "disc", target);
this->ExtractReplayGain(mp4TagMap, target);
handled = true;
}
}
if (!handled) {
const auto wvFile = dynamic_cast<TagLib::WavPack::File*>(file.file());
if (wvFile && wvFile->hasAPETag()) {
this->ReadFromMap(wvFile->properties(), target);
this->ExtractReplayGain(wvFile->properties(), target);
handled = true;
}
}
}
TagLib::AudioProperties *audio = file.audioProperties();
this->SetAudioProperties(audio, target);
}
/* aif files are similar to wav files, but for some reason taglib
doesn't seem to expose non-id3v2 tags */
const auto aifFile = dynamic_cast<TagLib::RIFF::AIFF::File*>(file.file());
if (aifFile) {
if (aifFile->hasID3v2Tag()) {
this->ReadID3V2(aifFile->tag(), target);
}
}
/* taglib hides certain properties (like album artist) in the XiphComment's
field list. if we're dealing with a straight-up Xiph tag, process it now */
const auto xiphTag = dynamic_cast<TagLib::Ogg::XiphComment*>(tag);
if (xiphTag) {
processAlbumArt(xiphTag->pictureList(), target);
this->ReadFromMap(xiphTag->fieldListMap(), target);
this->ExtractReplayGain(xiphTag->fieldListMap(), target);
}
/* if this isn't a xiph tag, the file format may have some other custom
properties. let's see if we can pull them out here... */
if (!xiphTag) {
bool handled = false;
/* flac files may have more than one type of tag embedded. see if there's
see if there's a xiph comment buried deep. */
auto flacFile = dynamic_cast<TagLib::FLAC::File*>(file.file());
if (flacFile) {
processAlbumArt(flacFile->pictureList(), target);
if (flacFile->hasXiphComment()) {
this->ReadFromMap(flacFile->xiphComment()->fieldListMap(), target);
this->ExtractReplayGain(flacFile->xiphComment()->fieldListMap(), target);
handled = true;
}
}
/* similarly, mp4 buries disc number and album artist. however, taglib does
NOT exposed a map with normalized keys, so we have to do special property
handling here... */
if (!handled) {
const auto mp4File = dynamic_cast<TagLib::MP4::File*>(file.file());
if (mp4File && mp4File->hasMP4Tag()) {
auto mp4TagMap = static_cast<TagLib::MP4::Tag*>(tag)->itemListMap();
this->ExtractValueForKey(mp4TagMap, "aART", "album_artist", target);
this->ExtractValueForKey(mp4TagMap, "disk", "disc", target);
this->ExtractReplayGain(mp4TagMap, target);
handled = true;
}
}
if (!handled) {
const auto wvFile = dynamic_cast<TagLib::WavPack::File*>(file.file());
if (wvFile && wvFile->hasAPETag()) {
this->ReadFromMap(wvFile->properties(), target);
this->ExtractReplayGain(wvFile->properties(), target);
handled = true;
}
}
}
TagLib::AudioProperties *audio = file.audioProperties();
this->SetAudioProperties(audio, target);
}
return true;
@ -385,18 +393,13 @@ void TaglibMetadataReader::ReadFromMap(const T& map, ITagStore *target) {
ExtractValueForKey(map, "DISCNUMBER", "disc", target);
ExtractValueForKey(map, "ALBUM ARTIST", "album_artist", target);
ExtractValueForKey(map, "ALBUMARTIST", "album_artist", target);
ExtractValueForKey(map, "RATING", "rating", target);
}
template<typename T>
void TaglibMetadataReader::ReadBasicData(const T* tag, const char* uri, ITagStore *target) {
if (tag) {
if (!tag->title().isEmpty()) {
this->SetTagValue("title", tag->title(), target);
}
else {
this->SetTagValue("title", uri, target);
}
this->SetTagValue("title", tag->title(), target);
this->SetTagValue("album", tag->album(), target);
this->SetTagValue("artist", tag->artist(), target);
this->SetTagValue("genre", tag->genre(), target);
@ -526,7 +529,7 @@ bool TaglibMetadataReader::ReadID3V2(TagLib::ID3v2::Tag *id3v2, ITagStore *track
for (auto current : txxx) {
using UTIF = TagLib::ID3v2::UserTextIdentificationFrame;
UTIF* utif = dynamic_cast<UTIF*>(current);
const UTIF* utif = dynamic_cast<UTIF*>(current);
if (utif) {
auto name = utif->description().upper();
auto values = utif->fieldList();
@ -568,6 +571,9 @@ bool TaglibMetadataReader::ReadID3V2(TagLib::ID3v2::Tag *id3v2, ITagStore *track
this->SetTagValue("totaldiscs", "1", track);
}
const int rating = this->ExtractRatingFromPopularimeter(allTags["POPM"]);
this->SetTagValue("rating", rating, track);
this->SetTagValues("bpm", allTags["TBPM"], track);
this->SetSlashSeparatedValues("composer", allTags["TCOM"], track);
this->SetTagValues("copyright", allTags["TCOP"], track);
@ -724,7 +730,6 @@ void TaglibMetadataReader::SetTagValues(
{
if (!frame.isEmpty()) {
TagLib::ID3v2::FrameList::ConstIterator value = frame.begin();
for ( ; value != frame.end(); ++value) {
TagLib::String tagString = (*value)->toString();
if(!tagString.isEmpty()) {
@ -781,3 +786,35 @@ void TaglibMetadataReader::SetAudioProperties(
}
}
}
int TaglibMetadataReader::ExtractRatingFromPopularimeter(const TagLib::ID3v2::FrameList& frame) {
/* the value of this tag is: 'some_str_identifier rating=[0-255] counter=[n]' */
if (!frame.isEmpty()) {
TagLib::ID3v2::FrameList::ConstIterator it = frame.begin();
for (; it != frame.end(); ++it) {
const TagLib::String rawTagValue = (*it)->toString();
if (!rawTagValue.isEmpty()) {
std::string utf8TagValue(rawTagValue.to8Bit(true));
const auto utf8TagValueParts = str::Split(utf8TagValue, " ");
if (utf8TagValueParts.size() > 2 && utf8TagValueParts.at(1).find_first_of("rating=") == 0) {
const auto ratingParts = str::Split(utf8TagValueParts.at(1), "=");
if (ratingParts.size() == 2) {
const auto utf8Rating = ratingParts.at(1);
try {
const auto intRating = std::atoi(utf8Rating.c_str());
if (intRating > 205) { return 5; }
if (intRating > 154) { return 4; }
if (intRating > 103) { return 3; }
if (intRating > 52) { return 5; }
if (intRating > 1) { return 1; }
}
catch (...) {
/* invalid rating, couldn't be parsed as an int. */
}
}
}
}
}
}
return 0;
}

View File

@ -95,6 +95,9 @@ class TaglibMetadataReader : public musik::core::sdk::ITagReader {
const std::string& inputKey,
const std::string& defaultValue);
int ExtractRatingFromPopularimeter(
const TagLib::ID3v2::FrameList& frame);
void SetTagValueWithPossibleTotal(
const std::string& value,
const std::string& valueKey,