• Skip to content
  • Skip to link menu
KDE 4.4 API Reference
  • KDE API Reference
  • KDE Support
  • Sitemap
  • Contact Us
 

strigi/src/streams

kmpsearcher.cpp

Go to the documentation of this file.
00001 /* This file is part of Strigi Desktop Search
00002  *
00003  * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Library General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Library General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Library General Public License
00016  * along with this library; see the file COPYING.LIB.  If not, write to
00017  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  */
00020 #include "kmpsearcher.h"
00021 #include <strigi/strigiconfig.h>
00022 #include <algorithm>
00023 
00024 /* This is not the KMP algorigthm. We're now using the faster (turbo)
00025    Boyer-Moore algorithm:
00026      http://www-igm.univ-mlv.fr/~lecroq/string/node15.html
00027 */
00028 
00029 using namespace std;
00030 using namespace Strigi;
00031 
00032 void
00033 preBmBc(const char* x, int32_t m, int32_t* bmBc) {
00034     int32_t i;
00035  
00036     for (i = 0; i < 256; ++i) {
00037         bmBc[i] = 1;
00038     }
00039     for (i = 0; i < m - 1; ++i) {
00040         bmBc[(unsigned char)x[i]] = -i;
00041     }
00042 }
00043 void
00044 suffixes(const char* x, int32_t m, int32_t* suff) {
00045     int32_t f, g, i;
00046 
00047     f = 0; 
00048     suff[m - 1] = m;
00049     g = m - 1;
00050     for (i = m - 2; i >= 0; --i) {
00051         if (i > g && suff[i + m - 1 - f] < i - g) {
00052             suff[i] = suff[i + m - 1 - f];
00053         } else {
00054             if (i < g) {
00055                 g = i;
00056             }
00057             f = i;
00058             while (g >= 0 && x[g] == x[g + m - 1 - f]) {
00059                 --g;
00060             }
00061             suff[i] = f - g;
00062         }
00063     }
00064 }
00065 void
00066 preBmGs(const char* x, int32_t m, int32_t* bmGs) {
00067     int32_t i, j;
00068     int32_t* suff;
00069 
00070     suff = new int32_t[m];
00071  
00072     suffixes(x, m, suff);
00073  
00074     for (i = 0; i < m; ++i) {
00075         bmGs[i] = m;
00076     }
00077     j = 0;
00078     for (i = m - 1; i >= 0; --i) {
00079         if (suff[i] == i + 1) {
00080             for (; j < m - 1 - i; ++j) {
00081                 if (bmGs[j] == m) {
00082                     bmGs[j] = m - 1 - i;
00083                 }
00084             }
00085         }
00086     }
00087     for (i = 0; i <= m - 2; ++i) {
00088         bmGs[m - 1 - suff[i]] = m - 1 - i;
00089     }
00090 
00091     delete [] suff;
00092 }
00093 KmpSearcher::KmpSearcher(const std::string& query) :table(0) {
00094     setQuery(query);
00095 }
00096 void
00097 KmpSearcher::setQuery(const string& query) {
00098     m_query = query;
00099     len = query.length();
00100     const char* p = query.c_str();
00101     int32_t tablesize = 4 * (257+len);
00102     if (table) {
00103         if (len > maxlen) {
00104             table = (int32_t*)realloc(table, tablesize);
00105             maxlen = len;
00106         }
00107     } else {
00108         table = (int32_t*)malloc(tablesize);
00109         maxlen = len;
00110     }
00111 
00112     preBmGs(p, len, table+256);
00113     preBmBc(p, len, table);
00114 }
00115 const char*
00116 KmpSearcher::search(const char* haystack, int32_t haylen) const {
00117     if (table == 0) return 0;
00118     const char* needle = m_query.c_str();
00119 
00120     int32_t i = 0;
00121     int32_t *bmGs = table + 256;
00122 
00123     const char* hayend = haystack + haylen - len;
00124     const char* jp = haystack;
00125 
00126 #ifndef TURBOBM
00127     int32_t bcShift, shift, u, v, turboShift;
00128     u = 0;
00129     shift = len;
00130     while (jp <= hayend) {
00131         i = len - 1;
00132         while (i >= 0 && needle[i] == jp[i]) {
00133             --i;
00134             if (u != 0 && i == len - 1 - shift) {
00135                 i -= u;
00136             }
00137         }
00138         if (i < 0) {
00139             break;
00140         } else {
00141             v = len - 1 - i;
00142             turboShift = u - v;
00143             bcShift = table[(unsigned char)jp[i]] + i;
00144             shift = max(turboShift, bcShift);
00145             shift = max(shift, bmGs[i]);
00146             if (shift == bmGs[i]) {
00147                 u = min(len - shift, v);
00148             } else {
00149                 if (turboShift < bcShift) {
00150                     shift = max(shift, u + 1);
00151                 }
00152                 u = 0;
00153             }
00154         }
00155         jp += shift;
00156     }
00157 #else
00158     while (jp <= hayend) {
00159         for (i = len - 1; i >= 0 && needle[i] == jp[i]; --i);
00160         if (i < 0) {
00161             break;
00162         } else {
00163             jp += max(bmGs[i], table[(unsigned char)jp[i]] + i);
00164         }
00165     }
00166 #endif
00167 
00168     if (i == -1) {
00169         return jp;
00170     } else {
00171         return 0;
00172     }
00173 }
00174 

strigi/src/streams

Skip menu "strigi/src/streams"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members

KDE Support

Skip menu "KDE Support"
  • akonadi
  • Decibel
  • grantlee
  • kdewin
  • phonon
  •     Backend
  • polkit-qt
  • qca
  • qimageblitz
  • soprano
  • strigi
  •     searchclient
  •     streamanalyzer
  •     streams
Generated for KDE Support by doxygen 1.5.9-20090814
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal