KFileMetaData

poextractor.cpp
1 /*
2  Gettext translation file analyzer
3 
4  Copyright (C) 2007 Montel Laurent <[email protected]>
5  Copyright (C) 2009 Jos van den Oever <[email protected]>
6  Copyright (C) 2014 Nick Shaforostoff <[email protected]>
7 
8  This library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU Lesser General Public
10  License as published by the Free Software Foundation; either
11  version 2.1 of the License, or (at your option) any later version.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public
19  License along with this library; if not, write to the Free Software
20  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22 
23 
24 #include "poextractor.h"
25 #include <QFile>
26 #include <fstream>
27 
28 using namespace KFileMetaData;
29 
30 POExtractor::POExtractor(QObject* parent)
31  : ExtractorPlugin(parent)
32 {
33 
34 }
35 
36 const QStringList supportedMimeTypes = {
37  QStringLiteral("text/x-gettext-translation"),
38 };
39 
40 QStringList POExtractor::mimetypes() const
41 {
42  return supportedMimeTypes;
43 }
44 
45 void POExtractor::endMessage()
46 {
47  messages++;
48  fuzzy+=isFuzzy;
49  untranslated+=(!isTranslated);
50 
51  isFuzzy = false;
52  isTranslated = false;
53  state = WHITESPACE;
54 }
55 
56 void POExtractor::handleComment(const char* data, quint32 length)
57 {
58  state = COMMENT;
59  if (length >= 8 && strncmp(data, "#, fuzzy", 8) == 0) { // could be better
60  isFuzzy = true;
61  }
62 }
63 
64 void POExtractor::handleLine(const char* data, quint32 length)
65 {
66  if (state == ERROR) return;
67  if (state == WHITESPACE) {
68  if (length == 0) return;
69  if (data[0] != '#') {
70  state = COMMENT; //this allows PO files w/o comments
71  } else {
72  handleComment(data, length);
73  return;
74  }
75  }
76  if (state == COMMENT) {
77  if (length == 0) {
78  state = WHITESPACE;
79  } else if (data[0] == '#') {
80  handleComment(data, length);
81  } else if (length > 7 && strncmp("msgctxt", data, 7) == 0) {
82  state = MSGCTXT;
83  } else if (length > 7 && strncmp("msgid \"", data, 7) == 0) {
84  state = MSGID;
85  } else {
86  state = ERROR;
87  }
88  return;
89  } else if (length > 1 && data[0] == '"' && data[length-1] == '"'
90  && (state == MSGCTXT || state == MSGID || state == MSGSTR
91  || state == MSGID_PLURAL)) {
92  // continued text field
93  isTranslated = state == MSGSTR && length > 2;
94  } else if (state == MSGCTXT
95  && length > 7 && strncmp("msgid \"", data, 7) == 0) {
96  state = MSGID;
97  } else if (state == MSGID
98  && length > 14 && strncmp("msgid_plural \"", data, 14) == 0) {
99  state = MSGID_PLURAL;
100  } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR)
101  && length > 8 && strncmp("msgstr", data, 6) == 0) {
102  state = MSGSTR;
103  isTranslated = strncmp(data+length-3, " \"\"", 3) != 0;
104  } else if (state == MSGSTR) {
105  if (length == 0) {
106  endMessage();
107  } else if (data[0]=='#' || data[0]=='m') { //allow PO without empty line between entries
108  endMessage();
109  state = COMMENT;
110  handleLine(data, length);
111  } else {
112  state = ERROR;
113  }
114  } else {
115  state = ERROR;
116  }
117 #if 0
118  if (messages > 1 || state != MSGSTR) return;
119 
120  // handle special values in the first message
121  // assumption is that value takes up only one line
122  if (strncmp("\"POT-Creation-Date: ", data, 20) == 0) {
123  result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21));
124  } else if (strncmp("\"PO-Revision-Date: ", data, 19) == 0) {
125  result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20));
126  } else if (strncmp("\"Last-Translator: ", data, 18) == 0) {
127  result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19));
128  }
129 #endif
130 }
131 
132 void POExtractor::extract(ExtractionResult* result)
133 {
134  std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData());
135  if (!fstream.is_open()) {
136  return;
137  }
138 
139  result->addType(Type::Text);
140  if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
141  return;
142  }
143 
144  state = WHITESPACE;
145  messages = 0;
146  untranslated = 0;
147  fuzzy = 0;
148  isFuzzy = false;
149  isTranslated = false;
150 
151  std::string line;
152  int lines = 0;
153  while (std::getline(fstream, line)) {
154  //TODO add a parsed text of translation units
155  //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
156  //result->append(QString::fromUtf8(arr));
157 
158  handleLine(line.c_str(), line.size());
159  lines++;
160 
161 
162  if (messages <= 1 && state == MSGSTR)
163  {
164  // handle special values in the first message
165  // assumption is that value takes up only one line
166  if (strncmp("\"POT-Creation-Date: ", line.c_str(), 20) == 0) {
167  result->add(Property::TranslationTemplateDate, QByteArray(line.c_str() + 20, line.size() - 21));
168  } else if (strncmp("\"PO-Revision-Date: ", line.c_str(), 19) == 0) {
169  result->add(Property::TranslationLastUpDate, QByteArray(line.c_str() + 19, line.size() - 20));
170  } else if (strncmp("\"Last-Translator: ", line.c_str(), 18) == 0) {
171  result->add(Property::TranslationLastAuthor, QString::fromUtf8(QByteArray::fromRawData(line.c_str() + 18, line.size() - 19)));
172  }
173  }
174  }
175  handleLine("", 0); //for files with non-empty last line
176  messages--;//cause header does not count
177 
178  result->add(Property::TranslationUnitsTotal, messages);
179  result->add(Property::TranslationUnitsWithTranslation, messages-untranslated);
180  result->add(Property::TranslationUnitsWithDraftTranslation, fuzzy);
181  result->add(Property::LineCount, lines);
182  //TODO WordCount
183 }
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
The ExtractorPlugin is the base class for all file metadata extractors.
QByteArray fromRawData(const char *data, int size)
virtual void addType(Type::Type type)=0
This function is called by the plugins.
QString fromUtf8(const char *str, int size)
The ExtractionResult class is where all the data extracted by the indexer is saved.
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
QByteArray encodeName(const QString &fileName)
QString inputUrl() const
The input url which the plugins will use to locate the file.
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon May 25 2020 23:11:16 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.