37 #define KDE_NO_DEBUG_OUTPUT
48 #include <QTextStream>
51 #include <kapplication.h>
53 #include <kgenericfactory.h>
58 using namespace GettextCatalog;
65 , _rxMsgCtxt (
"^msgctxt\\s*\".*\"$")
66 , _rxMsgId (
"^msgid\\s*\".*\"$")
67 , _rxMsgIdPlural (
"^msgid_plural\\s*\".*\"$")
68 , _rxMsgIdPluralBorked(
"^msgid_plural\\s*\"?.*\"?$")
69 , _rxMsgIdBorked (
"^msgid\\s*\"?.*\"?$")
70 , _rxMsgIdRemQuotes (
"^msgid\\s*\"")
71 , _rxMsgLineRemEndQuote (
"\"$")
72 , _rxMsgLineRemStartQuote (
"^\"")
73 , _rxMsgLine (
"^\".*\\n?\"$")
74 , _rxMsgLineBorked (
"^\"?.+\\n?\"?$")
75 , _rxMsgStr (
"^msgstr\\s*\".*\\n?\"$")
76 , _rxMsgStrOther (
"^msgstr\\s*\"?.*\\n?\"?$")
77 , _rxMsgStrPluralStart(
"^msgstr\\[0\\]\\s*\".*\\n?\"$")
78 , _rxMsgStrPluralStartBorked (
"^msgstr\\[0\\]\\s*\"?.*\\n?\"?$")
79 , _rxMsgStrPlural (
"^msgstr\\[[0-9]+\\]\\s*\".*\\n?\"$")
80 , _rxMsgStrPluralBorked (
"^msgstr\\[[0-9]\\]\\s*\"?.*\\n?\"?$")
81 , _rxMsgStrRemQuotes (
"^msgstr\\s*\"?")
83 , _obsoleteStart(
"#~")
84 , _msgctxtStart(
"msgctxt")
96 QTextCodec* codec=codecForDevice(device );
97 QTextStream stream(device);
99 stream.setCodec(codec);
107 kDebug() <<
"start parsing...";
113 bool recoveredErrorInHeader =
false;
116 kDebug() <<
"Recovered error in header entry";
117 recoveredErrorInHeader =
true;
119 else if (KDE_ISUNLIKELY( status !=
OK ))
121 kWarning() <<
"Parse error in header entry";
125 bool reconstructedHeader=!_msgid.isEmpty() && !_msgid.first().isEmpty();
128 if (KDE_ISUNLIKELY( reconstructedHeader ))
131 kWarning() <<
"Header entry has non-empty msgid. Creating a temporary header! " << _msgid;
134 "Content-Type: text/plain; charset=UTF-8\\n"
135 "Content-Transfer-Encoding: 8bit\\n"
136 "Mime-Version: 1.0" );
139 const QString comment(
"# Header entry was created by Lokalize.\n#\n" + _comment );
141 recoveredErrorInHeader =
true;
155 const bool docbookContent = tempHeader.
msgstr().contains(
"application/x-xml2pot" );
165 QList<int> errorIndex;
166 bool recoveredError=
false;
167 bool docbookFile=
false;
171 while( !stream.atEnd() )
173 if (reconstructedHeader)
174 reconstructedHeader=
false;
176 success=readEntry(stream);
178 if(KDE_ISLIKELY(success==
OK))
181 _extraDataSaver(_comment);
185 tempCatItem.
setPlural(_gettextPluralForm);
186 tempCatItem.
setMsgid( _msgid, _msgidMultiline );
187 tempCatItem.
setMsgstr( _msgstr, _msgstrMultiline );
188 if (_msgctxtPresent) tempCatItem.
setMsgctxt( _msgctxt );
195 docbookFile = tempCatItem.
comment().contains(
".docbook" );
200 kDebug() <<
"Recovered parse error in entry: " << counter;
202 errorIndex.append(counter);
205 tempCatItem.
setPlural(_gettextPluralForm);
208 if (_msgctxtPresent) tempCatItem.
setMsgctxt( _msgctxt );
217 kDebug() <<
"Parse error in entry: " << counter;
222 kDebug() <<
"Unknown success status, assumig parse error " << success;
230 if (KDE_ISUNLIKELY( !counter && !recoveredErrorInHeader ))
233 kDebug() <<
" Empty file?";
237 kDebug() <<
" ready";
250 if (KDE_ISUNLIKELY( recoveredErrorInHeader ))
252 kDebug() <<
" Returning: header error";
255 else if (KDE_ISUNLIKELY( recoveredError ))
257 kDebug() <<
" Returning: recovered parse error";
263 kDebug() <<
" Returning: OK! :-)";
268 QTextCodec* GettextImportPlugin::codecForDevice(QIODevice* device)
270 QTextStream stream( device );
273 stream.setCodec(
"UTF-8" );
274 stream.setAutoDetectUnicode(
true);
275 QTextCodec* codec=stream.codec();
280 kDebug() <<
"wasn't able to read header";
284 QRegExp regexp(
"Content-Type:\\s*\\w+/[-\\w]+;?\\s*charset\\s*=\\s*(\\S+)\\s*\\\\n");
285 if ( regexp.indexIn( _msgstr.first() ) == -1 )
287 kDebug() <<
"no charset entry found";
291 const QString charset = regexp.cap(1);
292 kDebug() <<
"charset: " << charset;
294 if (charset.isEmpty())
296 kWarning() <<
"No charset defined! Assuming UTF-8!";
303 if ( charset.contains(
"CHARSET"))
305 kDebug() << QString(
"file seems to be a template: using utf-8 encoding.");
306 return QTextCodec::codecForName(
"utf8");;
310 t = QTextCodec::codecForName(charset.toLatin1());
315 kWarning() <<
"charset found, but no codec available, using UTF-8 instead";
323 _msgstr.replaceInStrings(
"\\\"",
"\"");
324 _msgid.replaceInStrings(
"\\\"",
"\"");
325 _msgctxt.replace(
"\\\"",
"\"");
332 enum {Begin,Comment,Msgctxt,Msgid,Msgstr} part=Begin;
336 bool recoverableError=
false;
337 bool seenMsgctxt=
false;
339 _msgstr.append(QString());
341 _msgid.append(QString());
343 _msgctxtPresent=
false;
345 _gettextPluralForm=
false;
348 QStringList::Iterator msgstrIt=_msgstr.begin();
351 while( !stream.atEnd() )
355 if (!_bufferedLine.isEmpty())
358 _bufferedLine.clear();
361 line=stream.readLine();
363 kDebug() <<
"Parsing line: " << line;
365 static const QString lesslessless=
"<<<<<<<";
366 static const QString isisis=
"=======";
367 static const QString moremoremore=
">>>>>>>";
368 if (KDE_ISUNLIKELY( line.startsWith( lesslessless ) || line.startsWith( isisis ) || line.startsWith( moremoremore ) ))
372 kError() <<
"CVS/SVN conflict marker found! Aborting!" << endl << line << endl;
377 line = line.trimmed();
380 int len=line.length();
397 if(line.startsWith(_obsoleteStart))
403 else if(line.startsWith(
'#'))
408 else if( line.startsWith(_msgctxtStart) && line.contains( _rxMsgCtxt ) )
413 line.remove(QRegExp(
"^msgctxt\\s*\""));
414 line.remove(_rxMsgLineRemEndQuote);
416 _msgctxtPresent=
true;
419 else if( line.contains( _rxMsgId ) )
424 line.remove(_rxMsgIdRemQuotes);
425 line.remove(_rxMsgLineRemEndQuote);
427 _msgidMultiline=line.isEmpty();
428 (*(_msgid).begin())=line;
432 else if(KDE_ISUNLIKELY( line.contains( _rxMsgIdBorked ) ))
437 line.remove(QRegExp(
"^msgid\\s*\"?"));
438 line.remove(_rxMsgLineRemEndQuote);
440 _msgidMultiline=line.isEmpty();
441 (*(_msgid).begin())=line;
444 recoverableError=
true;
448 kDebug() <<
"no comment, msgctxt or msgid found after a comment: " << line;
453 else if(part==Comment)
455 if(!len && _obsolete )
return OK;
457 else if(line.startsWith(_obsoleteStart))
459 _comment+=(
'\n'+line);
462 else if(line.startsWith(
'#'))
464 _comment+=(
'\n'+line);
466 else if( line.startsWith(_msgctxtStart) &&line.contains( _rxMsgCtxt ) )
471 line.remove(QRegExp(
"^msgctxt\\s*\""));
472 line.remove(_rxMsgLineRemEndQuote);
474 _msgctxtPresent=
true;
477 else if( line.contains( _rxMsgId ) )
482 line.remove(_rxMsgIdRemQuotes);
483 line.remove(_rxMsgLineRemEndQuote);
485 _msgidMultiline=line.isEmpty();
486 (*(_msgid).begin())=line;
489 else if(KDE_ISUNLIKELY( line.contains( _rxMsgIdBorked ) ))
494 line.remove(QRegExp(
"^msgid\\s*\"?"));
495 line.remove(_rxMsgLineRemEndQuote);
497 _msgidMultiline=line.isEmpty();
498 (*(_msgid).begin())=line;
501 recoverableError=
true;
505 kDebug() <<
"no comment or msgid found after a comment while parsing: " << _comment;
510 else if(part==Msgctxt)
514 else if( line.contains( _rxMsgLine ) )
517 line.remove(_rxMsgLineRemStartQuote);
518 line.remove(_rxMsgLineRemEndQuote);
521 if(_msgctxt.isEmpty())
524 _msgctxt+=(
'\n'+line);
525 _msgctxtPresent=
true;
527 else if( line.contains( _rxMsgId ) )
532 line.remove(_rxMsgIdRemQuotes);
533 line.remove(_rxMsgLineRemEndQuote);
535 _msgidMultiline=line.isEmpty();
536 (*(_msgid).begin())=line;
539 else if(KDE_ISUNLIKELY( line.contains ( _rxMsgIdBorked ) ))
544 line.remove(QRegExp(
"^msgid\\s*\"?"));
545 line.remove(_rxMsgLineRemEndQuote);
547 _msgidMultiline=line.isEmpty();
548 (*(_msgid).begin())=line;
551 recoverableError=
true;
555 kDebug() <<
"no msgid found after a msgctxt while parsing: " << _msgctxt;
564 else if( line.contains( _rxMsgLine ) )
567 line.remove(_rxMsgLineRemStartQuote);
568 line.remove(_rxMsgLineRemEndQuote);
570 QStringList::Iterator it;
571 if(_gettextPluralForm)
585 else if( line.contains( _rxMsgIdPlural) )
588 _gettextPluralForm =
true;
591 line.remove(QRegExp(
"^msgid_plural\\s*\""));
592 line.remove(_rxMsgLineRemEndQuote);
597 else if(KDE_ISUNLIKELY( line.contains( _rxMsgIdPluralBorked ) ))
600 _gettextPluralForm =
true;
603 line.remove(QRegExp(
"^msgid_plural\\s*\"?"));
604 line.remove(_rxMsgLineRemEndQuote);
609 recoverableError=
true;
611 else if( !_gettextPluralForm && ( line.contains( _rxMsgStr ) ) )
616 line.remove(_rxMsgStrRemQuotes);
617 line.remove(_rxMsgLineRemEndQuote);
619 _msgstrMultiline=line.isEmpty();
622 else if( !_gettextPluralForm && ( line.contains( _rxMsgStrOther )) )
627 line.remove(_rxMsgStrRemQuotes);
628 line.remove(_rxMsgLineRemEndQuote);
630 _msgstrMultiline=line.isEmpty();
634 recoverableError=
true;
636 else if( _gettextPluralForm && ( line.contains( _rxMsgStrPluralStart ) ) )
641 line.remove(QRegExp(
"^msgstr\\[0\\]\\s*\"?"));
642 line.remove(_rxMsgLineRemEndQuote);
644 _msgstrMultiline=line.isEmpty();
647 else if(KDE_ISUNLIKELY( _gettextPluralForm && line.contains( _rxMsgStrPluralStartBorked ) ))
652 line.remove(QRegExp(
"^msgstr\\[0\\]\\s*\"?"));
653 line.remove(_rxMsgLineRemEndQuote);
655 _msgstrMultiline=line.isEmpty();
659 recoverableError=
true;
661 else if ( line.startsWith(
'#' ) )
664 kDebug() <<
"comment found after a msgid while parsing: " << _msgid.first();
668 else if ( line.startsWith(
"msgid" ) )
670 kDebug() <<
"Another msgid found after a msgid while parsing: " << _msgid.first();
675 else if(KDE_ISUNLIKELY( line.contains( _rxMsgLineBorked ) ))
677 recoverableError=
true;
680 line.remove(_rxMsgLineRemStartQuote);
681 line.remove(_rxMsgLineRemEndQuote);
683 QStringList::Iterator it;
684 if( _gettextPluralForm )
700 kDebug() <<
"no msgstr found after a msgid while parsing: " << _msgid.first();
705 else if(part==Msgstr)
710 else if( line.contains( _rxMsgLine ) )
713 line.remove(_rxMsgLineRemStartQuote);
714 line.remove(_rxMsgLineRemEndQuote);
716 if(!(*msgstrIt).isEmpty())
720 else if( _gettextPluralForm && ( line.contains( _rxMsgStrPlural ) ) )
723 line.remove(QRegExp(
"^msgstr\\[[0-9]+\\]\\s*\"?"));
724 line.remove(_rxMsgLineRemEndQuote);
726 _msgstr.append(line);
727 msgstrIt=_msgstr.end();
730 else if ( line.startsWith(
'#' ) || line.startsWith(
"msgid" ) )
736 else if(KDE_ISUNLIKELY( _gettextPluralForm && ( line.contains( _rxMsgStrPluralBorked ) ) ))
739 line.remove(QRegExp(
"^msgstr\\[[0-9]\\]\\s*\"?"));
740 line.remove(_rxMsgLineRemEndQuote);
742 _msgstr.append(line);
743 msgstrIt=_msgstr.end();
747 recoverableError=
true;
749 else if(line.startsWith(
"msgstr"))
751 kDebug() <<
"Another msgstr found after a msgstr while parsing: " << line << _msgstr.last();
756 else if(KDE_ISUNLIKELY( line.contains( _rxMsgLineBorked ) ))
758 recoverableError=
true;
761 line.remove(_rxMsgLineRemStartQuote);
762 line.remove(_rxMsgLineRemEndQuote);
764 if(!(*msgstrIt).isEmpty())
770 kDebug() <<
"no msgid or comment found after a msgstr while parsing: " << _msgstr.last();
794 if(KDE_ISUNLIKELY(error))
796 else if(KDE_ISUNLIKELY(recoverableError))
void setCatalogExtraData(const QStringList &data)
set extra data for the catalog, which can't be stored in CatalogItem.
void setMsgid(const QString &msg, const int form=0)
This class represents an entry in a catalog.
void setPlural(bool plural=true)
void setMsgctxt(const QString &msg)
Header error that could be recovered.
void setErrorIndex(const QList< int > &errors)
set the list of parse error indexes
void appendCatalogItem(const CatalogItem &item, const bool obsolete=false)
Append a new catalog item, either as normal or as an obsolete one.
ConversionStatus
Result of the conversion.
void setCodec(QTextCodec *codec)
Set the character encoding used in the catalog file.
HISTORY: this was a base class for Catalog import plugins in KBabel, but this architecture isn't not ...
ConversionStatus load(QIODevice *)
Reimplement this method to load the local file passed as an argument.
void setGeneratedFromDocbook(const bool fromDocbook)
set flag that the file is generated from DocBook
void setComment(const QString &com)
const QString & msgstr(const int form=0) const
void setHeader(const CatalogItem &header)
set the header catalog item
void setMsgstr(const QString &msg, const int form=0)