KFileMetaData

poextractor.cpp
1/*
2 Gettext translation file analyzer
3
4 SPDX-FileCopyrightText: 2007 Montel Laurent <montel@kde.org>
5 SPDX-FileCopyrightText: 2009 Jos van den Oever <jos@vandenoever.info>
6 SPDX-FileCopyrightText: 2014 Nick Shaforostoff <shaforostoff@gmail.com>
7
8 SPDX-License-Identifier: LGPL-2.1-or-later
9*/
10
11
12#include "poextractor.h"
13#include <QFile>
14#include <fstream>
15
16using namespace KFileMetaData;
17
18POExtractor::POExtractor(QObject* parent)
19 : ExtractorPlugin(parent)
20{
21
22}
23
24const QStringList supportedMimeTypes = {
25 QStringLiteral("text/x-gettext-translation"),
26};
27
28QStringList POExtractor::mimetypes() const
29{
30 return supportedMimeTypes;
31}
32
33void POExtractor::endMessage()
34{
35 messages++;
36 fuzzy+=isFuzzy;
37 untranslated+=(!isTranslated);
38
39 isFuzzy = false;
40 isTranslated = false;
41 state = WHITESPACE;
42}
43
44void POExtractor::handleComment(const char* data, quint32 length)
45{
46 state = COMMENT;
47 if (length >= 8 && strncmp(data, "#, fuzzy", 8) == 0) { // could be better
48 isFuzzy = true;
49 }
50}
51
52void POExtractor::handleLine(const char* data, quint32 length)
53{
54 if (state == ERROR) {
55 return;
56 }
57 if (state == WHITESPACE) {
58 if (length == 0) {
59 return;
60 }
61 if (data[0] != '#') {
62 state = COMMENT; //this allows PO files w/o comments
63 } else {
64 handleComment(data, length);
65 return;
66 }
67 }
68 if (state == COMMENT) {
69 if (length == 0) {
70 state = WHITESPACE;
71 } else if (data[0] == '#') {
72 handleComment(data, length);
73 } else if (length > 7 && strncmp("msgctxt", data, 7) == 0) {
74 state = MSGCTXT;
75 } else if (length > 7 && strncmp("msgid \"", data, 7) == 0) {
76 state = MSGID;
77 } else {
78 state = ERROR;
79 }
80 return;
81 } else if (length > 1 && data[0] == '"' && data[length-1] == '"'
82 && (state == MSGCTXT || state == MSGID || state == MSGSTR
83 || state == MSGID_PLURAL)) {
84 // continued text field
85 isTranslated = state == MSGSTR && length > 2;
86 } else if (state == MSGCTXT
87 && length > 7 && strncmp("msgid \"", data, 7) == 0) {
88 state = MSGID;
89 } else if (state == MSGID
90 && length > 14 && strncmp("msgid_plural \"", data, 14) == 0) {
91 state = MSGID_PLURAL;
92 } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR)
93 && length > 8 && strncmp("msgstr", data, 6) == 0) {
94 state = MSGSTR;
95 isTranslated = strncmp(data+length-3, " \"\"", 3) != 0;
96 } else if (state == MSGSTR) {
97 if (length == 0) {
98 endMessage();
99 } else if (data[0]=='#' || data[0]=='m') { //allow PO without empty line between entries
100 endMessage();
101 state = COMMENT;
102 handleLine(data, length);
103 } else {
104 state = ERROR;
105 }
106 } else {
107 state = ERROR;
108 }
109#if 0
110 if (messages > 1 || state != MSGSTR) return;
111
112 // handle special values in the first message
113 // assumption is that value takes up only one line
114 if (strncmp("\"POT-Creation-Date: ", data, 20) == 0) {
115 result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21));
116 } else if (strncmp("\"PO-Revision-Date: ", data, 19) == 0) {
117 result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20));
118 } else if (strncmp("\"Last-Translator: ", data, 18) == 0) {
119 result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19));
120 }
121#endif
122}
123
124void POExtractor::extract(ExtractionResult* result)
125{
126 std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData());
127 if (!fstream.is_open()) {
128 return;
129 }
130
131 result->addType(Type::Text);
132 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
133 return;
134 }
135
136 state = WHITESPACE;
137 messages = 0;
138 untranslated = 0;
139 fuzzy = 0;
140 isFuzzy = false;
141 isTranslated = false;
142
143 std::string line;
144 int lines = 0;
145 while (std::getline(fstream, line)) {
146 //TODO add a parsed text of translation units
147 //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
148 //result->append(QString::fromUtf8(arr));
149
150 handleLine(line.c_str(), line.size());
151 lines++;
152
153
154 if (messages <= 1 && state == MSGSTR)
155 {
156 // handle special values in the first message
157 // assumption is that value takes up only one line
158 if (strncmp("\"POT-Creation-Date: ", line.c_str(), 20) == 0) {
159 result->add(Property::TranslationTemplateDate, QByteArray(line.c_str() + 20, line.size() - 21));
160 } else if (strncmp("\"PO-Revision-Date: ", line.c_str(), 19) == 0) {
161 result->add(Property::TranslationLastUpDate, QByteArray(line.c_str() + 19, line.size() - 20));
162 } else if (strncmp("\"Last-Translator: ", line.c_str(), 18) == 0) {
163 result->add(Property::TranslationLastAuthor, QString::fromUtf8(QByteArray::fromRawData(line.c_str() + 18, line.size() - 19)));
164 }
165 }
166 }
167 handleLine("", 0); //for files with non-empty last line
168 messages--;//cause header does not count
169
170 result->add(Property::TranslationUnitsTotal, messages);
171 result->add(Property::TranslationUnitsWithTranslation, messages-untranslated);
172 result->add(Property::TranslationUnitsWithDraftTranslation, fuzzy);
173 result->add(Property::LineCount, lines);
174 //TODO WordCount
175}
176
177#include "moc_poextractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
QString inputUrl() const
The input URL which the plugins will use to locate the file.
virtual void addType(Type::Type type)=0
This function is called by the plugins.
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
The ExtractorPlugin is the base class for all file metadata extractors.
@ LineCount
The number of lines in a document.
Definition properties.h:151
@ Text
Any file which contains text data (i.e.
Definition types.h:83
The KFileMetaData namespace.
const char * constData() const const
QByteArray fromRawData(const char *data, qsizetype size)
QByteArray encodeName(const QString &fileName)
QString fromUtf8(QByteArrayView str)
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:48:11 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.