KFileMetaData

taglibextractor.cpp
1 /*
2  SPDX-FileCopyrightText: 2012 Vishesh Handa <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.1-or-later
5 */
6 
7 
8 #include "taglibextractor.h"
9 #include "kfilemetadata_debug.h"
10 
11 // Taglib includes
12 #include <taglib.h>
13 #include <tag.h>
14 #include <tfilestream.h>
15 #include <tpropertymap.h>
16 #include <aifffile.h>
17 #include <apefile.h>
18 #include <asffile.h>
19 #include <flacfile.h>
20 #include <mp4file.h>
21 #include <mpcfile.h>
22 #include <mpegfile.h>
23 #include <oggfile.h>
24 #include <opusfile.h>
25 #include <speexfile.h>
26 #include <vorbisfile.h>
27 #include <wavfile.h>
28 #include <wavpackfile.h>
29 #include <asftag.h>
30 #include <asfattribute.h>
31 #include <id3v2tag.h>
32 #include <mp4tag.h>
33 #include <popularimeterframe.h>
34 
35 using namespace KFileMetaData;
36 
37 namespace {
38 
39 const QStringList supportedMimeTypes = {
40  QStringLiteral("audio/flac"),
41  QStringLiteral("audio/mp4"),
42  QStringLiteral("audio/mpeg"),
43  QStringLiteral("audio/mpeg3"),
44  QStringLiteral("audio/ogg"),
45  QStringLiteral("audio/opus"),
46  QStringLiteral("audio/speex"),
47  QStringLiteral("audio/wav"),
48  QStringLiteral("audio/vnd.audible.aax"),
49  QStringLiteral("audio/x-aiff"),
50  QStringLiteral("audio/x-aifc"),
51  QStringLiteral("audio/x-ape"),
52  QStringLiteral("audio/x-mpeg"),
53  QStringLiteral("audio/x-ms-wma"),
54  QStringLiteral("audio/x-musepack"),
55  QStringLiteral("audio/x-opus+ogg"),
56  QStringLiteral("audio/x-speex+ogg"),
57  QStringLiteral("audio/x-vorbis+ogg"),
58  QStringLiteral("audio/x-wav"),
59  QStringLiteral("audio/x-wavpack"),
60 };
61 
62 void extractAudioProperties(TagLib::File* file, ExtractionResult* result)
63 {
64  TagLib::AudioProperties* audioProp = file->audioProperties();
65  if (audioProp && (result->inputFlags() & ExtractionResult::ExtractMetaData)) {
66  if (audioProp->length()) {
67  // What about the xml duration?
68  result->add(Property::Duration, audioProp->length());
69  }
70 
71  if (audioProp->bitrate()) {
72  result->add(Property::BitRate, audioProp->bitrate() * 1000);
73  }
74 
75  if (audioProp->channels()) {
76  result->add(Property::Channels, audioProp->channels());
77  }
78 
79  if (audioProp->sampleRate()) {
80  result->add(Property::SampleRate, audioProp->sampleRate());
81  }
82  }
83 }
84 
85 void readGenericProperties(const TagLib::PropertyMap &savedProperties, ExtractionResult* result)
86 {
87  if (!(result->inputFlags() & ExtractionResult::ExtractMetaData) || savedProperties.isEmpty()) {
88  return;
89  }
90 
91  if (savedProperties.contains("TITLE")) {
92  result->add(Property::Title, TStringToQString(savedProperties["TITLE"].toString()).trimmed());
93  }
94  if (savedProperties.contains("ALBUM")) {
95  result->add(Property::Album, TStringToQString(savedProperties["ALBUM"].toString()).trimmed());
96  }
97  if (savedProperties.contains("COMMENT")) {
98  result->add(Property::Comment, TStringToQString(savedProperties["COMMENT"].toString()).trimmed());
99  }
100  if (savedProperties.contains("TRACKNUMBER")) {
101  result->add(Property::TrackNumber, savedProperties["TRACKNUMBER"].toString().toInt());
102  }
103  if (savedProperties.contains("DATE")) {
104  result->add(Property::ReleaseYear, savedProperties["DATE"].toString().toInt());
105  }
106  if (savedProperties.contains("OPUS")) {
107  result->add(Property::Opus, savedProperties["OPUS"].toString().toInt());
108  }
109  if (savedProperties.contains("DISCNUMBER")) {
110  result->add(Property::DiscNumber, savedProperties["DISCNUMBER"].toString().toInt());
111  }
112  if (savedProperties.contains("RATING")) {
113  /*
114  * There is no standard regarding ratings. Mimic MediaMonkey's behavior
115  * with a range of 0 to 100 (stored in steps of 10) and make it compatible
116  * with baloo rating with a range from 0 to 10
117  */
118  result->add(Property::Rating, savedProperties["RATING"].toString().toInt() / 10);
119  }
120  if (savedProperties.contains("LOCATION")) {
121  result->add(Property::Location, TStringToQString(savedProperties["LOCATION"].toString()).trimmed());
122  }
123  if (savedProperties.contains("LANGUAGE")) {
124  result->add(Property::Language, TStringToQString(savedProperties["LANGUAGE"].toString()).trimmed());
125  }
126  if (savedProperties.contains("LICENSE")) {
127  result->add(Property::License, TStringToQString(savedProperties["LICENSE"].toString()).trimmed());
128  }
129  if (savedProperties.contains("PUBLISHER")) {
130  result->add(Property::Publisher, TStringToQString(savedProperties["PUBLISHER"].toString()).trimmed());
131  }
132  if (savedProperties.contains("COPYRIGHT")) {
133  result->add(Property::Copyright, TStringToQString(savedProperties["COPYRIGHT"].toString()).trimmed());
134  }
135  if (savedProperties.contains("LABEL")) {
136  result->add(Property::Label, TStringToQString(savedProperties["LABEL"].toString()).trimmed());
137  }
138  if (savedProperties.contains("ENSEMBLE")) {
139  result->add(Property::Ensemble, TStringToQString(savedProperties["ENSEMBLE"].toString()).trimmed());
140  }
141  if (savedProperties.contains("COMPILATION")) {
142  result->add(Property::Compilation, TStringToQString(savedProperties["COMPILATION"].toString()).trimmed());
143  }
144  if (savedProperties.contains("LYRICS")) {
145  result->add(Property::Lyrics, TStringToQString(savedProperties["LYRICS"].toString()).trimmed());
146  }
147  if (savedProperties.contains("ARTIST")) {
148  const auto artists = savedProperties["ARTIST"];
149  for (const auto& artist : artists) {
150  result->add(Property::Artist, TStringToQString(artist).trimmed());
151  }
152  }
153  if (savedProperties.contains("GENRE")) {
154  const auto genres = savedProperties["GENRE"];
155  for (const auto& genre : genres) {
156  result->add(Property::Genre, TStringToQString(genre).trimmed());
157  }
158  }
159  if (savedProperties.contains("ALBUMARTIST")) {
160  const auto albumArtists = savedProperties["ALBUMARTIST"];
161  for (const auto& albumArtist : albumArtists) {
162  result->add(Property::AlbumArtist, TStringToQString(albumArtist).trimmed());
163  }
164  }
165  if (savedProperties.contains("COMPOSER")) {
166  const auto composers = savedProperties["COMPOSER"];
167  for (const auto& composer : composers) {
168  result->add(Property::Composer, TStringToQString(composer).trimmed());
169  }
170  }
171  if (savedProperties.contains("LYRICIST")) {
172  const auto lyricists = savedProperties["LYRICIST"];
173  for (const auto& lyricist : lyricists) {
174  result->add(Property::Lyricist, TStringToQString(lyricist).trimmed());
175  }
176  }
177  if (savedProperties.contains("CONDUCTOR")) {
178  const auto conductors = savedProperties["CONDUCTOR"];
179  for (const auto& conductor : conductors) {
180  result->add(Property::Conductor, TStringToQString(conductor).trimmed());
181  }
182  }
183  if (savedProperties.contains("ARRANGER")) {
184  const auto arrangers = savedProperties["ARRANGER"];
185  for (const auto& arranger : arrangers) {
186  result->add(Property::Arranger, TStringToQString(arranger).trimmed());
187  }
188  }
189  if (savedProperties.contains("PERFORMER")) {
190  const auto performers = savedProperties["PERFORMER"];
191  for (const auto& performer : performers) {
192  result->add(Property::Performer, TStringToQString(performer).trimmed());
193  }
194  }
195  if (savedProperties.contains("AUTHOR")) {
196  const auto authors = savedProperties["AUTHOR"];
197  for (const auto& author: authors) {
198  result->add(Property::Author, TStringToQString(author).trimmed());
199  }
200  }
201 
202  if (savedProperties.contains("REPLAYGAIN_TRACK_GAIN")) {
203  auto trackGainString = TStringToQString(savedProperties["REPLAYGAIN_TRACK_GAIN"].toString(";")).trimmed();
204  // remove " dB" suffix
205  if (trackGainString.endsWith(QStringLiteral(" dB"), Qt::CaseInsensitive)) {
206  trackGainString.chop(3);
207  }
208  bool success = false;
209  double replayGainTrackGain = trackGainString.toDouble(&success);
210  if (success) {
211  result->add(Property::ReplayGainTrackGain, replayGainTrackGain);
212  }
213  }
214  if (savedProperties.contains("REPLAYGAIN_ALBUM_GAIN")) {
215  auto albumGainString = TStringToQString(savedProperties["REPLAYGAIN_ALBUM_GAIN"].toString(";")).trimmed();
216  // remove " dB" suffix
217  if (albumGainString.endsWith(QStringLiteral(" dB"), Qt::CaseInsensitive)) {
218  albumGainString.chop(3);
219  }
220  bool success = false;
221  double replayGainAlbumGain = albumGainString.toDouble(&success);
222  if (success) {
223  result->add(Property::ReplayGainAlbumGain, replayGainAlbumGain);
224  }
225  }
226  if (savedProperties.contains("REPLAYGAIN_TRACK_PEAK")) {
227  auto trackPeakString = TStringToQString(savedProperties["REPLAYGAIN_TRACK_PEAK"].toString(";")).trimmed();
228  bool success = false;
229  double replayGainTrackPeak = trackPeakString.toDouble(&success);
230  if (success) {
231  result->add(Property::ReplayGainTrackPeak, replayGainTrackPeak);
232  }
233  }
234  if (savedProperties.contains("REPLAYGAIN_ALBUM_PEAK")) {
235  auto albumPeakString = TStringToQString(savedProperties["REPLAYGAIN_ALBUM_PEAK"].toString(";")).trimmed();
236  bool success = false;
237  double replayGainAlbumPeak = albumPeakString.toDouble(&success);
238  if (success) {
239  result->add(Property::ReplayGainAlbumPeak, replayGainAlbumPeak);
240  }
241  }
242 }
243 
244 void extractId3Tags(TagLib::ID3v2::Tag* Id3Tags, ExtractionResult* result)
245 {
246  if (!(result->inputFlags() & ExtractionResult::ExtractMetaData) || Id3Tags->isEmpty()) {
247  return;
248  }
249 
250  TagLib::ID3v2::FrameList lstID3v2;
251 
252  /*
253  * Publisher.
254  * Special handling because TagLib::PropertyMap maps "TPUB" to "LABEL"
255  * Insert manually for Publisher.
256  */
257  lstID3v2 = Id3Tags->frameListMap()["TPUB"];
258  if (!lstID3v2.isEmpty()) {
259  result->add(Property::Publisher, TStringToQString(lstID3v2.front()->toString()));
260  }
261 
262  // Compilation.
263  lstID3v2 = Id3Tags->frameListMap()["TCMP"];
264  if (!lstID3v2.isEmpty()) {
265  result->add(Property::Compilation, TStringToQString(lstID3v2.front()->toString()));
266  }
267 
268  /*
269  * Rating.
270  * There is no standard regarding ratings. Most of the implementations match
271  * a 5 stars rating to a range of 0-255 for MP3.
272  * Map it to baloo rating with a range of 0 - 10.
273  */
274  lstID3v2 = Id3Tags->frameListMap()["POPM"];
275  if (!lstID3v2.isEmpty()) {
276  TagLib::ID3v2::PopularimeterFrame *ratingFrame = static_cast<TagLib::ID3v2::PopularimeterFrame *>(lstID3v2.front());
277  int rating = ratingFrame->rating();
278  if (rating == 0) {
279  rating = 0;
280  } else if (rating == 1) {
281  TagLib::String ratingProvider = ratingFrame->email();
282  if (ratingProvider == "[email protected]" || ratingProvider == "org.kde.kfilemetadata") {
283  rating = 1;
284  } else {
285  rating = 2;
286  }
287  } else if (rating >= 1 && rating <= 255) {
288  rating = static_cast<int>(0.032 * rating + 2);
289  }
290  result->add(Property::Rating, rating);
291  }
292 }
293 
294 void extractMp4Tags(TagLib::MP4::Tag* mp4Tags, ExtractionResult* result)
295 {
296  if (!(result->inputFlags() & ExtractionResult::ExtractMetaData) || mp4Tags->isEmpty()) {
297  return;
298  }
299 
300  TagLib::MP4::ItemListMap allTags = mp4Tags->itemListMap();
301 
302  /*
303  * There is no standard regarding ratings. Mimic MediaMonkey's behavior
304  * with a range of 0 to 100 (stored in steps of 10) and make it compatible
305  * with baloo rating with a range from 0 to 10.
306  */
307  TagLib::MP4::ItemListMap::Iterator itRating = allTags.find("rate");
308  if (itRating != allTags.end()) {
309  result->add(Property::Rating, itRating->second.toStringList().toString().toInt() / 10);
310  }
311 }
312 
313 void extractAsfTags(TagLib::ASF::Tag* asfTags, ExtractionResult* result)
314 {
315  if (!(result->inputFlags() & ExtractionResult::ExtractMetaData) || asfTags->isEmpty()) {
316  return;
317  }
318 
319  TagLib::ASF::AttributeList lstASF = asfTags->attribute("WM/SharedUserRating");
320  if (!lstASF.isEmpty()) {
321  int rating = lstASF.front().toString().toInt();
322  /*
323  * Map the rating values of WMP to Baloo rating.
324  * 0->0, 1->2, 25->4, 50->6, 75->8, 99->10
325  */
326  if (rating == 0) {
327  rating = 0;
328  } else if (rating == 1) {
329  rating = 2;
330  } else {
331  rating = static_cast<int>(0.09 * rating + 2);
332  }
333  result->add(Property::Rating, rating);
334  }
335 
336  lstASF = asfTags->attribute("Author");
337  if (!lstASF.isEmpty()) {
338  const auto attribute = lstASF.front();
339  result->add(Property::Author, TStringToQString(attribute.toString()).trimmed());
340  }
341 
342  // Lyricist is called "WRITER" for wma/asf files
343  lstASF = asfTags->attribute("WM/Writer");
344  if (!lstASF.isEmpty()) {
345  const auto attribute = lstASF.front();
346  result->add(Property::Lyricist, TStringToQString(attribute.toString()).trimmed());
347  }
348 
349  /*
350  * TagLib exports "WM/PUBLISHER" as "LABEL" in the PropertyMap,
351  * add it manually to Publisher.
352  */
353  lstASF = asfTags->attribute("WM/Publisher");
354  if (!lstASF.isEmpty()) {
355  const auto attribute = lstASF.front();
356  result->add(Property::Publisher, TStringToQString(attribute.toString()).trimmed());
357  }
358 }
359 
360 } // anonymous namespace
361 
362 TagLibExtractor::TagLibExtractor(QObject* parent)
363  : ExtractorPlugin(parent)
364 {
365 }
366 
367 QStringList TagLibExtractor::mimetypes() const
368 {
369  return supportedMimeTypes;
370 }
371 
372 void TagLibExtractor::extract(ExtractionResult* result)
373 {
374  const QString fileUrl = result->inputUrl();
375  const QString mimeType = getSupportedMimeType(result->inputMimetype());
376 
377  // Open the file readonly. Important if we're sandboxed.
378 #if defined Q_OS_WINDOWS
379  TagLib::FileStream stream(fileUrl.toLocal8Bit().constData(), true);
380 #else
381  TagLib::FileStream stream(fileUrl.toUtf8().constData(), true);
382 #endif
383  if (!stream.isOpen()) {
384  qCWarning(KFILEMETADATA_LOG) << "Unable to open file readonly: " << fileUrl;
385  return;
386  }
387 
388  if (mimeType == QLatin1String("audio/mpeg") || mimeType == QLatin1String("audio/mpeg3")
389  || mimeType == QLatin1String("audio/x-mpeg")) {
390  TagLib::MPEG::File file(&stream, TagLib::ID3v2::FrameFactory::instance(), true);
391  if (file.isValid()) {
392  extractAudioProperties(&file, result);
393  readGenericProperties(file.properties(), result);
394  if (file.hasID3v2Tag()) {
395  extractId3Tags(file.ID3v2Tag(), result);
396  }
397  }
398  } else if (mimeType == QLatin1String("audio/x-aiff") || mimeType == QLatin1String("audio/x-aifc")) {
399  TagLib::RIFF::AIFF::File file(&stream, true);
400  if (file.isValid()) {
401  extractAudioProperties(&file, result);
402  readGenericProperties(file.properties(), result);
403  if (file.hasID3v2Tag()) {
404  extractId3Tags(file.tag(), result);
405  }
406  }
407  } else if (mimeType == QLatin1String("audio/wav") || mimeType == QLatin1String("audio/x-wav")) {
408  TagLib::RIFF::WAV::File file(&stream, true);
409  if (file.isValid()) {
410  extractAudioProperties(&file, result);
411  readGenericProperties(file.properties(), result);
412  if (file.hasID3v2Tag()) {
413  extractId3Tags(file.tag(), result);
414  }
415  }
416  } else if (mimeType == QLatin1String("audio/x-musepack")) {
417  TagLib::MPC::File file(&stream, true);
418  if (file.isValid()) {
419  extractAudioProperties(&file, result);
420  readGenericProperties(file.properties(), result);
421  }
422  } else if (mimeType == QLatin1String("audio/x-ape")) {
423  TagLib::APE::File file(&stream, true);
424  if (file.isValid()) {
425  extractAudioProperties(&file, result);
426  readGenericProperties(file.properties(), result);
427  }
428  } else if (mimeType == QLatin1String("audio/x-wavpack")) {
429  TagLib::WavPack::File file(&stream, true);
430  if (file.isValid()) {
431  extractAudioProperties(&file, result);
432  readGenericProperties(file.properties(), result);
433  }
434  } else if ((mimeType == QLatin1String("audio/mp4")) ||
435  (mimeType == QLatin1String("audio/vnd.audible.aax"))) {
436  TagLib::MP4::File file(&stream, true);
437  if (file.isValid()) {
438  extractAudioProperties(&file, result);
439  readGenericProperties(file.properties(), result);
440  extractMp4Tags(file.tag(), result);
441  }
442  } else if (mimeType == QLatin1String("audio/flac")) {
443  TagLib::FLAC::File file(&stream, TagLib::ID3v2::FrameFactory::instance(), true);
444  if (file.isValid()) {
445  extractAudioProperties(&file, result);
446  readGenericProperties(file.properties(), result);
447  }
448  } else if (mimeType == QLatin1String("audio/ogg") || mimeType == QLatin1String("audio/x-vorbis+ogg")) {
449  TagLib::Ogg::Vorbis::File file(&stream, true);
450  if (file.isValid()) {
451  extractAudioProperties(&file, result);
452  readGenericProperties(file.properties(), result);
453  }
454  } else if (mimeType == QLatin1String("audio/opus") || mimeType == QLatin1String("audio/x-opus+ogg")) {
455  TagLib::Ogg::Opus::File file(&stream, true);
456  if (file.isValid()) {
457  extractAudioProperties(&file, result);
458  readGenericProperties(file.properties(), result);
459  }
460  } else if (mimeType == QLatin1String("audio/speex") || mimeType == QLatin1String("audio/x-speex+ogg")) {
461  TagLib::Ogg::Speex::File file(&stream, true);
462  // Workaround for buggy taglib:
463  // isValid() returns true for invalid files, but XiphComment* tag() returns a nullptr
464  if (file.isValid() && file.tag()) {
465  extractAudioProperties(&file, result);
466  readGenericProperties(file.properties(), result);
467  }
468  } else if (mimeType == QLatin1String("audio/x-ms-wma")) {
469  TagLib::ASF::File file(&stream, true);
470  if (file.isValid()) {
471  extractAudioProperties(&file, result);
472  readGenericProperties(file.properties(), result);
473  extractAsfTags(file.tag(), result);
474  }
475  }
476 
477  result->addType(Type::Audio);
478 }
479 
480 // TAG information (incomplete).
481 // https://xiph.org/vorbis/doc/v-comment.html
482 // https://help.mp3tag.de/main_tags.html
483 // http://id3.org/
484 // https://www.legroom.net/2009/05/09/ogg-vorbis-and-flac-comment-field-recommendations
485 // https://kodi.wiki/view/Music_tagging#Tags_Kodi_reads
486 // https://wiki.hydrogenaud.io/index.php?title=Tag_Mapping
487 // https://picard.musicbrainz.org/docs/mappings/
488 // -- FLAC/OGG --
489 // Artist: ARTIST, PERFORMER
490 // Album artist: ALBUMARTIST
491 // Composer: COMPOSER
492 // Lyricist: LYRICIST
493 // Conductor: CONDUCTOR
494 // Disc number: DISCNUMBER
495 // Total discs: TOTALDISCS, DISCTOTAL
496 // Track number: TRACKNUMBER
497 // Total tracks: TOTALTRACKS, TRACKTOTAL
498 // Genre: GENRE
499 // -- ID3v2 --
500 // Artist: TPE1
501 // Album artist: TPE2
502 // Composer: TCOM
503 // Lyricist: TEXT
504 // Conductor: TPE3
505 // Disc number[/total dics]: TPOS
506 // Track number[/total tracks]: TRCK
507 // Genre: TCON
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
The ExtractorPlugin is the base class for all file metadata extractors.
virtual void addType(Type::Type type)=0
This function is called by the plugins.
CaseInsensitive
const char * constData() const const
QString inputMimetype() const
The input mimetype.
QByteArray toLocal8Bit() const const
char * toString(const T &value)
QString mimeType(Type)
The ExtractionResult class is where all the data extracted by the indexer is saved.
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
QString inputUrl() const
The input url which the plugins will use to locate the file.
QByteArray toUtf8() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Sun Jul 12 2020 22:54:18 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.