KFileMetaData

poextractor.cpp
1 /*
2  Gettext translation file analyzer
3 
4  SPDX-FileCopyrightText: 2007 Montel Laurent <[email protected]>
5  SPDX-FileCopyrightText: 2009 Jos van den Oever <[email protected]>
6  SPDX-FileCopyrightText: 2014 Nick Shaforostoff <[email protected]>
7 
8  SPDX-License-Identifier: LGPL-2.1-or-later
9 */
10 
11 
12 #include "poextractor.h"
13 #include <QFile>
14 #include <fstream>
15 
16 using namespace KFileMetaData;
17 
18 POExtractor::POExtractor(QObject* parent)
19  : ExtractorPlugin(parent)
20 {
21 
22 }
23 
24 const QStringList supportedMimeTypes = {
25  QStringLiteral("text/x-gettext-translation"),
26 };
27 
28 QStringList POExtractor::mimetypes() const
29 {
30  return supportedMimeTypes;
31 }
32 
33 void POExtractor::endMessage()
34 {
35  messages++;
36  fuzzy+=isFuzzy;
37  untranslated+=(!isTranslated);
38 
39  isFuzzy = false;
40  isTranslated = false;
41  state = WHITESPACE;
42 }
43 
44 void POExtractor::handleComment(const char* data, quint32 length)
45 {
46  state = COMMENT;
47  if (length >= 8 && strncmp(data, "#, fuzzy", 8) == 0) { // could be better
48  isFuzzy = true;
49  }
50 }
51 
52 void POExtractor::handleLine(const char* data, quint32 length)
53 {
54  if (state == ERROR) {
55  return;
56  }
57  if (state == WHITESPACE) {
58  if (length == 0) {
59  return;
60  }
61  if (data[0] != '#') {
62  state = COMMENT; //this allows PO files w/o comments
63  } else {
64  handleComment(data, length);
65  return;
66  }
67  }
68  if (state == COMMENT) {
69  if (length == 0) {
70  state = WHITESPACE;
71  } else if (data[0] == '#') {
72  handleComment(data, length);
73  } else if (length > 7 && strncmp("msgctxt", data, 7) == 0) {
74  state = MSGCTXT;
75  } else if (length > 7 && strncmp("msgid \"", data, 7) == 0) {
76  state = MSGID;
77  } else {
78  state = ERROR;
79  }
80  return;
81  } else if (length > 1 && data[0] == '"' && data[length-1] == '"'
82  && (state == MSGCTXT || state == MSGID || state == MSGSTR
83  || state == MSGID_PLURAL)) {
84  // continued text field
85  isTranslated = state == MSGSTR && length > 2;
86  } else if (state == MSGCTXT
87  && length > 7 && strncmp("msgid \"", data, 7) == 0) {
88  state = MSGID;
89  } else if (state == MSGID
90  && length > 14 && strncmp("msgid_plural \"", data, 14) == 0) {
91  state = MSGID_PLURAL;
92  } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR)
93  && length > 8 && strncmp("msgstr", data, 6) == 0) {
94  state = MSGSTR;
95  isTranslated = strncmp(data+length-3, " \"\"", 3) != 0;
96  } else if (state == MSGSTR) {
97  if (length == 0) {
98  endMessage();
99  } else if (data[0]=='#' || data[0]=='m') { //allow PO without empty line between entries
100  endMessage();
101  state = COMMENT;
102  handleLine(data, length);
103  } else {
104  state = ERROR;
105  }
106  } else {
107  state = ERROR;
108  }
109 #if 0
110  if (messages > 1 || state != MSGSTR) return;
111 
112  // handle special values in the first message
113  // assumption is that value takes up only one line
114  if (strncmp("\"POT-Creation-Date: ", data, 20) == 0) {
115  result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21));
116  } else if (strncmp("\"PO-Revision-Date: ", data, 19) == 0) {
117  result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20));
118  } else if (strncmp("\"Last-Translator: ", data, 18) == 0) {
119  result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19));
120  }
121 #endif
122 }
123 
124 void POExtractor::extract(ExtractionResult* result)
125 {
126  std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData());
127  if (!fstream.is_open()) {
128  return;
129  }
130 
131  result->addType(Type::Text);
132  if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
133  return;
134  }
135 
136  state = WHITESPACE;
137  messages = 0;
138  untranslated = 0;
139  fuzzy = 0;
140  isFuzzy = false;
141  isTranslated = false;
142 
143  std::string line;
144  int lines = 0;
145  while (std::getline(fstream, line)) {
146  //TODO add a parsed text of translation units
147  //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
148  //result->append(QString::fromUtf8(arr));
149 
150  handleLine(line.c_str(), line.size());
151  lines++;
152 
153 
154  if (messages <= 1 && state == MSGSTR)
155  {
156  // handle special values in the first message
157  // assumption is that value takes up only one line
158  if (strncmp("\"POT-Creation-Date: ", line.c_str(), 20) == 0) {
159  result->add(Property::TranslationTemplateDate, QByteArray(line.c_str() + 20, line.size() - 21));
160  } else if (strncmp("\"PO-Revision-Date: ", line.c_str(), 19) == 0) {
161  result->add(Property::TranslationLastUpDate, QByteArray(line.c_str() + 19, line.size() - 20));
162  } else if (strncmp("\"Last-Translator: ", line.c_str(), 18) == 0) {
163  result->add(Property::TranslationLastAuthor, QString::fromUtf8(QByteArray::fromRawData(line.c_str() + 18, line.size() - 19)));
164  }
165  }
166  }
167  handleLine("", 0); //for files with non-empty last line
168  messages--;//cause header does not count
169 
170  result->add(Property::TranslationUnitsTotal, messages);
171  result->add(Property::TranslationUnitsWithTranslation, messages-untranslated);
172  result->add(Property::TranslationUnitsWithDraftTranslation, fuzzy);
173  result->add(Property::LineCount, lines);
174  //TODO WordCount
175 }
virtual void addType(Type::Type type)=0
This function is called by the plugins.
The ExtractionResult class is where all the data extracted by the indexer is saved....
QString fromUtf8(const char *str, int size)
QByteArray fromRawData(const char *data, int size)
QByteArray encodeName(const QString &fileName)
QString inputUrl() const
The input url which the plugins will use to locate the file.
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
const char * constData() const const
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
The ExtractorPlugin is the base class for all file metadata extractors. It is responsible for extract...
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Fri May 27 2022 03:47:54 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.