kio

kmimemagic.cpp

00001 /* This file is part of the KDE libraries
00002    Copyright (C) 2000 Fritz Elfert <fritz@kde.org>
00003    Copyright (C) 2004 Allan Sandfeld Jensen <kde@carewolf.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License version 2 as published by the Free Software Foundation.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017    Boston, MA 02110-1301, USA.
00018 */
00019 #include "kmimemagic.h"
00020 #include <kdebug.h>
00021 #include <kapplication.h>
00022 #include <qfile.h>
00023 #include <ksimpleconfig.h>
00024 #include <kstandarddirs.h>
00025 #include <kstaticdeleter.h>
00026 #include <klargefile.h>
00027 #include <assert.h>
00028 
00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00030 static void process(struct config_rec* conf,  const QString &);
00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00032 static int tagmagic(unsigned char *buf, int nbytes);
00033 static int textmagic(struct config_rec* conf, unsigned char *, int);
00034 
00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00036 static int match(struct config_rec* conf, unsigned char *, int);
00037 
00038 KMimeMagic* KMimeMagic::s_pSelf;
00039 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00040 
00041 KMimeMagic* KMimeMagic::self()
00042 {
00043   if( !s_pSelf )
00044     initStatic();
00045   return s_pSelf;
00046 }
00047 
00048 void KMimeMagic::initStatic()
00049 {
00050   s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00051   s_pSelf->setFollowLinks( true );
00052 }
00053 
00054 #include <stdio.h>
00055 #include <unistd.h>
00056 #include <stdlib.h>
00057 #include <sys/wait.h>
00058 #include <sys/types.h>
00059 #include <sys/stat.h>
00060 #include <fcntl.h>
00061 #include <errno.h>
00062 #include <ctype.h>
00063 #include <time.h>
00064 #include <utime.h>
00065 #include <stdarg.h>
00066 #include <qregexp.h>
00067 #include <qstring.h>
00068 
00069 //#define MIME_MAGIC_DEBUG_TABLE // untested
00070 
00071 // Uncomment to debug the config-file parsing phase
00072 //#define DEBUG_APPRENTICE
00073 // Uncomment to debug the matching phase
00074 //#define DEBUG_MIMEMAGIC
00075 
00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00077 #define DEBUG_LINENUMBERS
00078 #endif
00079 
00080 /*
00081  * Buitltin Mime types
00082  */
00083 #define MIME_BINARY_UNKNOWN    "application/octet-stream"
00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00085 #define MIME_BINARY_ZEROSIZE   "application/x-zerosize"
00086 #define MIME_TEXT_UNKNOWN      "text/plain"
00087 #define MIME_TEXT_PLAIN        "text/plain"
00088 #define MIME_INODE_DIR         "inode/directory"
00089 #define MIME_INODE_CDEV        "inode/chardevice"
00090 #define MIME_INODE_BDEV        "inode/blockdevice"
00091 #define MIME_INODE_FIFO        "inode/fifo"
00092 #define MIME_INODE_LINK        "inode/link"
00093 #define MIME_INODE_SOCK        "inode/socket"
00094 // Following should go in magic-file - Fritz
00095 #define MIME_APPL_TROFF        "application/x-troff"
00096 #define MIME_APPL_TAR          "application/x-tar"
00097 #define MIME_TEXT_FORTRAN      "text/x-fortran"
00098 
00099 #define MAXMIMESTRING        256
00100 
00101 #define HOWMANY 4000            /* big enough to recognize most WWW files, and skip GPL-headers */
00102 #define MAXDESC   50            /* max leng of text description */
00103 #define MAXstring 64            /* max leng of "string" types */
00104 
00105 typedef union VALUETYPE {
00106     unsigned char b;
00107     unsigned short h;
00108     unsigned long l;
00109     char s[MAXstring];
00110     unsigned char hs[2];    /* 2 bytes of a fixed-endian "short" */
00111     unsigned char hl[4];    /* 2 bytes of a fixed-endian "long" */
00112 } VALUETYPE;
00113 
00114 struct magic {
00115     struct magic *next;     /* link to next entry */
00116 #ifdef DEBUG_LINENUMBERS
00117     int lineno;             /* line number from magic file - doesn't say from which one ;) */
00118 #endif
00119 
00120     short flag;
00121 #define INDIR    1              /* if '>(...)' appears,  */
00122 #define UNSIGNED 2              /* comparison is unsigned */
00123     short cont_level;       /* level of ">" */
00124     struct {
00125         char type;      /* byte short long */
00126         long offset;    /* offset from indirection */
00127     } in;
00128     long offset;            /* offset to magic number */
00129     unsigned char reln;     /* relation (0=eq, '>'=gt, etc) */
00130     char type;              /* int, short, long or string. */
00131     char vallen;            /* length of string value, if any */
00132 #define BYTE       1
00133 #define SHORT      2
00134 #define LONG       4
00135 #define STRING     5
00136 #define DATE       6
00137 #define BESHORT    7
00138 #define BELONG     8
00139 #define BEDATE     9
00140 #define LESHORT   10
00141 #define LELONG    11
00142 #define LEDATE    12
00143     VALUETYPE value;        /* either number or string */
00144     unsigned long mask;     /* mask before comparison with value */
00145     char nospflag;          /* suppress space character */
00146 
00147     /* NOTE: this string is suspected of overrunning - find it! */
00148     char desc[MAXDESC];     /* description */
00149 };
00150 
00151 /*
00152  * data structures for tar file recognition
00153  * --------------------------------------------------------------------------
00154  * Header file for public domain tar (tape archive) program.
00155  *
00156  * @(#)tar.h 1.20 86/10/29    Public Domain. Created 25 August 1985 by John
00157  * Gilmore, ihnp4!hoptoad!gnu.
00158  *
00159  * Header block on tape.
00160  *
00161  * I'm going to use traditional DP naming conventions here. A "block" is a big
00162  * chunk of stuff that we do I/O on. A "record" is a piece of info that we
00163  * care about. Typically many "record"s fit into a "block".
00164  */
00165 #define RECORDSIZE    512
00166 #define NAMSIZ    100
00167 #define TUNMLEN    32
00168 #define TGNMLEN    32
00169 
00170 union record {
00171     char charptr[RECORDSIZE];
00172     struct header {
00173         char name[NAMSIZ];
00174         char mode[8];
00175         char uid[8];
00176         char gid[8];
00177         char size[12];
00178         char mtime[12];
00179         char chksum[8];
00180         char linkflag;
00181         char linkname[NAMSIZ];
00182         char magic[8];
00183         char uname[TUNMLEN];
00184         char gname[TGNMLEN];
00185         char devmajor[8];
00186         char devminor[8];
00187     } header;
00188 };
00189 
00190 /* The magic field is filled with this if uname and gname are valid. */
00191 #define    TMAGIC        "ustar  "  /* 7 chars and a null */
00192 
00193 /*
00194  * file-function prototypes
00195  */
00196 static int is_tar(unsigned char *, int);
00197 static unsigned long signextend(struct magic *, unsigned long);
00198 static int getvalue(struct magic *, char **);
00199 static int hextoint(int);
00200 static char *getstr(char *, char *, int, int *);
00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00202 static int mcheck(union VALUETYPE *, struct magic *);
00203 static int mconvert(union VALUETYPE *, struct magic *);
00204 static long from_oct(int, char *);
00205 
00206 /*
00207  * includes for ASCII substring recognition formerly "names.h" in file
00208  * command
00209  *
00210  * Original notes: names and types used by ascmagic in file(1).
00211  * These tokens are
00212  * here because they can appear anywhere in the first HOWMANY bytes, while
00213  * tokens in /etc/magic must appear at fixed offsets into the file. Don't
00214  * make HOWMANY too high unless you have a very fast CPU.
00215  */
00216 
00217 /* these types are used calculate index to 'types': keep em in sync! */
00218 /* HTML inserted in first because this is a web server module now */
00219 /* ENG removed because stupid */
00220 #define L_HTML   0x001          /* HTML */
00221 #define L_C      0x002          /* first and foremost on UNIX */
00222 #define L_MAKE   0x004          /* Makefiles */
00223 #define L_PLI    0x008          /* PL/1 */
00224 #define L_MACH   0x010          /* some kinda assembler */
00225 #define L_PAS    0x020          /* Pascal */
00226 #define L_JAVA   0x040          /* Java source */
00227 #define L_CPP    0x080          /* C++ */
00228 #define L_MAIL   0x100          /* Electronic mail */
00229 #define L_NEWS   0x200          /* Usenet Netnews */
00230 #define L_DIFF   0x400          /* Output of diff */
00231 #define L_OBJC   0x800          /* Objective C */
00232 
00233 // Note: this is not a type, it's just used to mark items that should count more
00234 #define FLAG_STRONG 0x1000
00235 
00236 #define P_HTML   0          /* HTML */
00237 #define P_C      1          /* first and foremost on UNIX */
00238 #define P_MAKE   2          /* Makefiles */
00239 #define P_PLI    3          /* PL/1 */
00240 #define P_MACH   4          /* some kinda assembler */
00241 #define P_PAS    5          /* Pascal */
00242 #define P_JAVA   6          /* Java source */
00243 #define P_CPP    7          /* C++ */
00244 #define P_MAIL   8          /* Electronic mail */
00245 #define P_NEWS   9          /* Usenet Netnews */
00246 #define P_DIFF  10          /* Output of diff */
00247 #define P_OBJC  11          /* Objective C */
00248 
00249 typedef struct asc_type {
00250     const char *type;
00251     int  kwords;
00252     double  weight;
00253 } asc_type;
00254 
00255 static const asc_type types[] = {
00256     { "text/html",         19, 2 }, // 10 items but 10 different words only
00257     { "text/x-c",          13, 1 },
00258     { "text/x-makefile",    4, 1.9 },
00259     { "text/x-pli",         1, 3 },
00260     { "text/x-assembler",   6, 2.1 },
00261     { "text/x-pascal",      1, 1 },
00262     { "text/x-java",       12, 1 },
00263     { "text/x-c++",        19, 1 },
00264     { "message/rfc822",     4, 1.9 },
00265     { "message/news",       3, 2 },
00266         { "text/x-diff",        4, 2 },
00267         { "text/x-objc",    10, 1 }
00268 };
00269 
00270 #define NTYPES (sizeof(types)/sizeof(asc_type))
00271 
00272 static struct names {
00273     const char *name;
00274     short type;
00275 } const names[] = {
00276     {
00277         "<html", L_HTML | FLAG_STRONG
00278     },
00279     {
00280         "<HTML", L_HTML | FLAG_STRONG
00281     },
00282     {
00283         "<head", L_HTML
00284     },
00285     {
00286         "<HEAD", L_HTML
00287     },
00288     {
00289         "<body", L_HTML
00290     },
00291     {
00292         "<BODY", L_HTML
00293     },
00294     {
00295         "<title", L_HTML
00296     },
00297     {
00298         "<TITLE", L_HTML
00299     },
00300     {
00301         "<h1", L_HTML
00302     },
00303     {
00304         "<H1", L_HTML
00305     },
00306     {
00307         "<a", L_HTML
00308     },
00309     {
00310         "<A", L_HTML
00311     },
00312     {
00313         "<img", L_HTML
00314     },
00315     {
00316         "<IMG", L_HTML
00317     },
00318     {
00319         "<!--", L_HTML
00320     },
00321     {
00322         "<!doctype", L_HTML
00323     },
00324     {
00325         "<!DOCTYPE", L_HTML
00326     },
00327     {
00328         "<div", L_HTML
00329     },
00330     {
00331         "<DIV", L_HTML
00332     },
00333     {
00334         "<frame", L_HTML
00335     },
00336     {
00337         "<FRAME", L_HTML
00338     },
00339     {
00340         "<frameset", L_HTML
00341     },
00342     {
00343         "<FRAMESET", L_HTML
00344     },
00345         {
00346                 "<script", L_HTML | FLAG_STRONG
00347         },
00348         {
00349                 "<SCRIPT", L_HTML | FLAG_STRONG
00350         },
00351     {
00352         "/*", L_C|L_CPP|L_JAVA|L_OBJC
00353     },
00354     {
00355         "//", L_C|L_CPP|L_JAVA|L_OBJC
00356     },
00357     {
00358         "#include", L_C|L_CPP
00359     },
00360     {
00361         "#ifdef", L_C|L_CPP
00362     },
00363     {
00364         "#ifndef", L_C|L_CPP
00365     },
00366     {
00367         "bool", L_C|L_CPP
00368     },
00369     {
00370         "char", L_C|L_CPP|L_JAVA|L_OBJC
00371     },
00372     {
00373         "int", L_C|L_CPP|L_JAVA|L_OBJC
00374     },
00375     {
00376         "float", L_C|L_CPP|L_JAVA|L_OBJC
00377     },
00378     {
00379         "void", L_C|L_CPP|L_JAVA|L_OBJC
00380     },
00381     {
00382         "extern", L_C|L_CPP
00383     },
00384     {
00385         "struct", L_C|L_CPP
00386     },
00387     {
00388         "union", L_C|L_CPP
00389     },
00390     {
00391         "implements", L_JAVA
00392     },
00393     {
00394         "super", L_JAVA
00395     },
00396     {
00397         "import", L_JAVA
00398     },
00399     {
00400         "class", L_CPP|L_JAVA
00401     },
00402     {
00403         "public", L_CPP|L_JAVA
00404     },
00405     {
00406         "private", L_CPP|L_JAVA
00407     },
00408     {
00409         "explicit", L_CPP
00410     },
00411     {
00412         "virtual", L_CPP
00413     },
00414     {
00415         "namespace", L_CPP
00416     },
00417     {
00418         "#import", L_OBJC
00419     },
00420     {
00421         "@interface", L_OBJC
00422     },
00423     {
00424         "@implementation", L_OBJC
00425     },
00426     {
00427         "@protocol", L_OBJC
00428     },
00429     {
00430         "CFLAGS", L_MAKE
00431     },
00432     {
00433         "LDFLAGS", L_MAKE
00434     },
00435     {
00436         "all:", L_MAKE
00437     },
00438     {
00439         ".PHONY:", L_MAKE
00440     },
00441     {
00442         "srcdir", L_MAKE
00443     },
00444     {
00445         "exec_prefix", L_MAKE
00446     },
00447     /*
00448      * Too many files of text have these words in them.  Find another way
00449      * to recognize Fortrash.
00450      */
00451     {
00452         ".ascii", L_MACH
00453     },
00454     {
00455         ".asciiz", L_MACH
00456     },
00457     {
00458         ".byte", L_MACH
00459     },
00460     {
00461         ".even", L_MACH
00462     },
00463     {
00464         ".globl", L_MACH
00465     },
00466     {
00467         "clr", L_MACH
00468     },
00469     {
00470         "(input", L_PAS
00471     },
00472     {
00473         "dcl", L_PLI
00474     },
00475     {
00476         "Received:", L_MAIL
00477     },
00478     /* we now stop at '>' for tokens, so this one won't work {
00479         ">From", L_MAIL
00480         },*/
00481     {
00482         "Return-Path:", L_MAIL
00483     },
00484     {
00485         "Cc:", L_MAIL
00486     },
00487     {
00488         "Newsgroups:", L_NEWS
00489     },
00490     {
00491         "Path:", L_NEWS
00492     },
00493     {
00494         "Organization:", L_NEWS
00495     },
00496     {
00497         "---", L_DIFF
00498     },
00499     {
00500         "+++", L_DIFF
00501     },
00502     {
00503         "***", L_DIFF
00504     },
00505     {
00506         "@@", L_DIFF
00507     },
00508     {
00509         NULL, 0
00510     }
00511 };
00512 
00523 class KMimeMagicUtimeConf
00524 {
00525 public:
00526     KMimeMagicUtimeConf()
00527     {
00528         tmpDirs << QString::fromLatin1("/tmp"); // default value
00529 
00530         // The trick is that we also don't want the user to override globally set
00531         // directories. So we have to misuse KStandardDirs :}
00532         QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00533         if ( !confDirs.isEmpty() )
00534         {
00535             QString globalConf = confDirs.last() + "kmimemagicrc";
00536             if ( QFile::exists( globalConf ) )
00537             {
00538                 KSimpleConfig cfg( globalConf );
00539                 cfg.setGroup( "Settings" );
00540                 tmpDirs = cfg.readListEntry( "atimeDirs" );
00541             }
00542             if ( confDirs.count() > 1 )
00543             {
00544                 QString localConf = confDirs.first() + "kmimemagicrc";
00545                 if ( QFile::exists( localConf ) )
00546                 {
00547                     KSimpleConfig cfg( localConf );
00548                     cfg.setGroup( "Settings" );
00549                     tmpDirs += cfg.readListEntry( "atimeDirs" );
00550                 }
00551             }
00552             for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00553             {
00554                 QString dir = *it;
00555                 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00556                     (*it) += '/';
00557             }
00558         }
00559 #if 0
00560         // debug code
00561         for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00562             kdDebug(7018) << " atimeDir: " << *it << endl;
00563 #endif
00564     }
00565 
00566     bool restoreAccessTime( const QString & file ) const
00567     {
00568         QString dir = file.left( file.findRev( '/' ) );
00569         bool res = tmpDirs.contains( dir );
00570         //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl;
00571         return res;
00572     }
00573     QStringList tmpDirs;
00574 };
00575 
00576 /* current config */
00577 struct config_rec {
00578     bool followLinks;
00579     QString resultBuf;
00580     int accuracy;
00581 
00582     struct magic *magic,    /* head of magic config list */
00583     *last;
00584     KMimeMagicUtimeConf * utimeConf;
00585 };
00586 
00587 #ifdef MIME_MAGIC_DEBUG_TABLE
00588 static void
00589 test_table()
00590 {
00591     struct magic *m;
00592     struct magic *prevm = NULL;
00593 
00594     kdDebug(7018) << "test_table : started" << endl;
00595     for (m = conf->magic; m; m = m->next) {
00596         if (isprint((((unsigned long) m) >> 24) & 255) &&
00597             isprint((((unsigned long) m) >> 16) & 255) &&
00598             isprint((((unsigned long) m) >> 8) & 255) &&
00599             isprint(((unsigned long) m) & 255)) {
00600             //debug("test_table: POINTER CLOBBERED! "
00601             //"m=\"%c%c%c%c\" line=%d",
00602                   (((unsigned long) m) >> 24) & 255,
00603                   (((unsigned long) m) >> 16) & 255,
00604                   (((unsigned long) m) >> 8) & 255,
00605                   ((unsigned long) m) & 255,
00606                   prevm ? prevm->lineno : -1);
00607             break;
00608         }
00609         prevm = m;
00610     }
00611 }
00612 #endif
00613 
00614 #define    EATAB {while (isascii((unsigned char) *l) && \
00615           isspace((unsigned char) *l))  ++l;}
00616 
00617 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00618 {
00619     int ws_offset;
00620 
00621     /* delete newline */
00622     if (line[0]) {
00623         line[strlen(line) - 1] = '\0';
00624     }
00625     /* skip leading whitespace */
00626     ws_offset = 0;
00627     while (line[ws_offset] && isspace(line[ws_offset])) {
00628         ws_offset++;
00629     }
00630 
00631     /* skip blank lines */
00632     if (line[ws_offset] == 0) {
00633         return 0;
00634     }
00635     /* comment, do not parse */
00636     if (line[ws_offset] == '#')
00637         return 0;
00638 
00639     /* if we get here, we're going to use it so count it */
00640     (*rule)++;
00641 
00642     /* parse it */
00643     return (parse(line + ws_offset, lineno) != 0);
00644 }
00645 
00646 /*
00647  * apprentice - load configuration from the magic file.
00648  */
00649 int KMimeMagic::apprentice( const QString& magicfile )
00650 {
00651     FILE *f;
00652     char line[BUFSIZ + 1];
00653     int errs = 0;
00654     int lineno;
00655     int rule = 0;
00656     QCString fname;
00657 
00658     if (magicfile.isEmpty())
00659         return -1;
00660     fname = QFile::encodeName(magicfile);
00661     f = fopen(fname, "r");
00662     if (f == NULL) {
00663         kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00664         return -1;
00665     }
00666 
00667     /* parse it */
00668     for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00669         if (parse_line(line, &rule, lineno))
00670             errs++;
00671 
00672     fclose(f);
00673 
00674 #ifdef DEBUG_APPRENTICE
00675     kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00676     kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00677 #endif
00678 
00679 #ifdef MIME_MAGIC_DEBUG_TABLE
00680     test_table();
00681 #endif
00682 
00683     return (errs ? -1 : 0);
00684 }
00685 
00686 int KMimeMagic::buff_apprentice(char *buff)
00687 {
00688     char line[BUFSIZ + 2];
00689     int errs = 0;
00690     int lineno = 1;
00691     char *start = buff;
00692     char *end;
00693     int count = 0;
00694     int rule = 0;
00695     int len = strlen(buff) + 1;
00696 
00697     /* parse it */
00698     do {
00699         count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00700         strncpy(line, start, count);
00701         line[count] = '\0';
00702         if ((end = strchr(line, '\n'))) {
00703             *(++end) = '\0';
00704             count = strlen(line);
00705         } else
00706           strcat(line, "\n");
00707         start += count;
00708         len -= count;
00709         if (parse_line(line, &rule, lineno))
00710             errs++;
00711         lineno++;
00712     } while (len > 0);
00713 
00714 #ifdef DEBUG_APPRENTICE
00715     kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00716     kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00717 #endif
00718 
00719 #ifdef MIME_MAGIC_DEBUG_TABLE
00720     test_table();
00721 #endif
00722 
00723     return (errs ? -1 : 0);
00724 }
00725 
00726 /*
00727  * extend the sign bit if the comparison is to be signed
00728  */
00729 static unsigned long
00730 signextend(struct magic *m, unsigned long v)
00731 {
00732     if (!(m->flag & UNSIGNED))
00733         switch (m->type) {
00734                 /*
00735                  * Do not remove the casts below.  They are vital.
00736                  * When later compared with the data, the sign
00737                  * extension must have happened.
00738                  */
00739             case BYTE:
00740                 v = (char) v;
00741                 break;
00742             case SHORT:
00743             case BESHORT:
00744             case LESHORT:
00745                 v = (short) v;
00746                 break;
00747             case DATE:
00748             case BEDATE:
00749             case LEDATE:
00750             case LONG:
00751             case BELONG:
00752             case LELONG:
00753                 v = (long) v;
00754                 break;
00755             case STRING:
00756                 break;
00757             default:
00758                 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00759                 return 998; //good value
00760         }
00761     return v;
00762 }
00763 
00764 /*
00765  * parse one line from magic file, put into magic[index++] if valid
00766  */
00767 int KMimeMagic::parse(char *l, int
00768 #ifdef DEBUG_LINENUMBERS
00769     lineno
00770 #endif
00771         )
00772 {
00773     int i = 0;
00774     struct magic *m;
00775     char *t,
00776     *s;
00777     /* allocate magic structure entry */
00778     if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00779         kdError(7018) << "parse: Out of memory." << endl;
00780         return -1;
00781     }
00782     /* append to linked list */
00783     m->next = NULL;
00784     if (!conf->magic || !conf->last) {
00785         conf->magic = conf->last = m;
00786     } else {
00787         conf->last->next = m;
00788         conf->last = m;
00789     }
00790 
00791     /* set values in magic structure */
00792     m->flag = 0;
00793     m->cont_level = 0;
00794 #ifdef DEBUG_LINENUMBERS
00795     m->lineno = lineno;
00796 #endif
00797 
00798     while (*l == '>') {
00799         ++l;            /* step over */
00800         m->cont_level++;
00801     }
00802 
00803     if (m->cont_level != 0 && *l == '(') {
00804         ++l;            /* step over */
00805         m->flag |= INDIR;
00806     }
00807     /* get offset, then skip over it */
00808     m->offset = (int) strtol(l, &t, 0);
00809     if (l == t) {
00810             kdError(7018) << "parse: offset " << l << " invalid" << endl;
00811     }
00812     l = t;
00813 
00814     if (m->flag & INDIR) {
00815         m->in.type = LONG;
00816         m->in.offset = 0;
00817         /*
00818          * read [.lbs][+-]nnnnn)
00819          */
00820         if (*l == '.') {
00821             switch (*++l) {
00822                 case 'l':
00823                     m->in.type = LONG;
00824                     break;
00825                 case 's':
00826                     m->in.type = SHORT;
00827                     break;
00828                 case 'b':
00829                     m->in.type = BYTE;
00830                     break;
00831                 default:
00832                     kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00833                     break;
00834             }
00835             l++;
00836         }
00837         s = l;
00838         if (*l == '+' || *l == '-')
00839             l++;
00840         if (isdigit((unsigned char) *l)) {
00841             m->in.offset = strtol(l, &t, 0);
00842             if (*s == '-')
00843                 m->in.offset = -m->in.offset;
00844         } else
00845             t = l;
00846         if (*t++ != ')') {
00847             kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00848         }
00849         l = t;
00850     }
00851     while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00852         ++l;
00853     EATAB;
00854 
00855 #define NBYTE       4
00856 #define NSHORT      5
00857 #define NLONG       4
00858 #define NSTRING     6
00859 #define NDATE       4
00860 #define NBESHORT    7
00861 #define NBELONG     6
00862 #define NBEDATE     6
00863 #define NLESHORT    7
00864 #define NLELONG     6
00865 #define NLEDATE     6
00866 
00867     if (*l == 'u') {
00868         ++l;
00869         m->flag |= UNSIGNED;
00870     }
00871     /* get type, skip it */
00872     if (strncmp(l, "byte", NBYTE) == 0) {
00873         m->type = BYTE;
00874         l += NBYTE;
00875     } else if (strncmp(l, "short", NSHORT) == 0) {
00876         m->type = SHORT;
00877         l += NSHORT;
00878     } else if (strncmp(l, "long", NLONG) == 0) {
00879         m->type = LONG;
00880         l += NLONG;
00881     } else if (strncmp(l, "string", NSTRING) == 0) {
00882         m->type = STRING;
00883         l += NSTRING;
00884     } else if (strncmp(l, "date", NDATE) == 0) {
00885         m->type = DATE;
00886         l += NDATE;
00887     } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00888         m->type = BESHORT;
00889         l += NBESHORT;
00890     } else if (strncmp(l, "belong", NBELONG) == 0) {
00891         m->type = BELONG;
00892         l += NBELONG;
00893     } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00894         m->type = BEDATE;
00895         l += NBEDATE;
00896     } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00897         m->type = LESHORT;
00898         l += NLESHORT;
00899     } else if (strncmp(l, "lelong", NLELONG) == 0) {
00900         m->type = LELONG;
00901         l += NLELONG;
00902     } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00903         m->type = LEDATE;
00904         l += NLEDATE;
00905     } else {
00906         kdError(7018) << "parse: type " << l << " invalid" << endl;
00907         return -1;
00908     }
00909     /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
00910     if (*l == '&') {
00911         ++l;
00912         m->mask = signextend(m, strtol(l, &l, 0));
00913     } else
00914         m->mask = (unsigned long) ~0L;
00915     EATAB;
00916 
00917     switch (*l) {
00918         case '>':
00919         case '<':
00920             /* Old-style anding: "0 byte &0x80 dynamically linked" */
00921         case '&':
00922         case '^':
00923         case '=':
00924             m->reln = *l;
00925             ++l;
00926             break;
00927         case '!':
00928             if (m->type != STRING) {
00929                 m->reln = *l;
00930                 ++l;
00931                 break;
00932             }
00933             /* FALL THROUGH */
00934         default:
00935             if (*l == 'x' && isascii((unsigned char) l[1]) &&
00936                 isspace((unsigned char) l[1])) {
00937                 m->reln = *l;
00938                 ++l;
00939                 goto GetDesc;   /* Bill The Cat */
00940             }
00941             m->reln = '=';
00942             break;
00943     }
00944     EATAB;
00945 
00946     if (getvalue(m, &l))
00947         return -1;
00948     /*
00949      * now get last part - the description
00950      */
00951       GetDesc:
00952     EATAB;
00953     if (l[0] == '\b') {
00954         ++l;
00955         m->nospflag = 1;
00956     } else if ((l[0] == '\\') && (l[1] == 'b')) {
00957         ++l;
00958         ++l;
00959         m->nospflag = 1;
00960     } else
00961         m->nospflag = 0;
00962         // Copy description - until EOL or '#' (for comments)
00963         while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00964             m->desc[i++] = *l++;
00965         m->desc[i] = '\0';
00966         // Remove trailing spaces
00967         while (--i>0 && isspace( m->desc[i] ))
00968             m->desc[i] = '\0';
00969 
00970         // old code
00971     //while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) /* NULLBODY */ ;
00972 
00973 #ifdef DEBUG_APPRENTICE
00974     kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00975 #endif
00976     return 0;
00977 }
00978 
00979 /*
00980  * Read a numeric value from a pointer, into the value union of a magic
00981  * pointer, according to the magic type.  Update the string pointer to point
00982  * just after the number read.  Return 0 for success, non-zero for failure.
00983  */
00984 static int
00985 getvalue(struct magic *m, char **p)
00986 {
00987     int slen;
00988 
00989     if (m->type == STRING) {
00990         *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00991         m->vallen = slen;
00992     } else if (m->reln != 'x')
00993         m->value.l = signextend(m, strtol(*p, p, 0));
00994     return 0;
00995 }
00996 
00997 /*
00998  * Convert a string containing C character escapes.  Stop at an unescaped
00999  * space or tab. Copy the converted version to "p", returning its length in
01000  * *slen. Return updated scan pointer as function result.
01001  */
01002 static char *
01003 getstr(register char *s, register char *p, int plen, int *slen)
01004 {
01005     char *origs = s,
01006     *origp = p;
01007     char *pmax = p + plen - 1;
01008     register int c;
01009     register int val;
01010 
01011     while ((c = *s++) != '\0') {
01012         if (isspace((unsigned char) c))
01013             break;
01014         if (p >= pmax) {
01015             kdError(7018) << "String too long: " << origs << endl;
01016             break;
01017         }
01018         if (c == '\\') {
01019             switch (c = *s++) {
01020 
01021                 case '\0':
01022                     goto out;
01023 
01024                 default:
01025                     *p++ = (char) c;
01026                     break;
01027 
01028                 case 'n':
01029                     *p++ = '\n';
01030                     break;
01031 
01032                 case 'r':
01033                     *p++ = '\r';
01034                     break;
01035 
01036                 case 'b':
01037                     *p++ = '\b';
01038                     break;
01039 
01040                 case 't':
01041                     *p++ = '\t';
01042                     break;
01043 
01044                 case 'f':
01045                     *p++ = '\f';
01046                     break;
01047 
01048                 case 'v':
01049                     *p++ = '\v';
01050                     break;
01051 
01052                     /* \ and up to 3 octal digits */
01053                 case '0':
01054                 case '1':
01055                 case '2':
01056                 case '3':
01057                 case '4':
01058                 case '5':
01059                 case '6':
01060                 case '7':
01061                     val = c - '0';
01062                     c = *s++;   /* try for 2 */
01063                     if (c >= '0' && c <= '7') {
01064                         val = (val << 3) | (c - '0');
01065                         c = *s++;   /* try for 3 */
01066                         if (c >= '0' && c <= '7')
01067                             val = (val << 3) | (c - '0');
01068                         else
01069                             --s;
01070                     } else
01071                         --s;
01072                     *p++ = (char) val;
01073                     break;
01074 
01075                     /* \x and up to 3 hex digits */
01076                 case 'x':
01077                     val = 'x';  /* Default if no digits */
01078                     c = hextoint(*s++); /* Get next char */
01079                     if (c >= 0) {
01080                         val = c;
01081                         c = hextoint(*s++);
01082                         if (c >= 0) {
01083                             val = (val << 4) + c;
01084                             c = hextoint(*s++);
01085                             if (c >= 0) {
01086                                 val = (val << 4) + c;
01087                             } else
01088                                 --s;
01089                         } else
01090                             --s;
01091                     } else
01092                         --s;
01093                     *p++ = (char) val;
01094                     break;
01095             }
01096         } else
01097             *p++ = (char) c;
01098     }
01099       out:
01100     *p = '\0';
01101     *slen = p - origp;
01102     //for ( char* foo = origp; foo < p ; ++foo )
01103     //  kdDebug(7018) << "  " << *foo << endl;
01104     return s;
01105 }
01106 
01107 
01108 /* Single hex char to int; -1 if not a hex char. */
01109 static int
01110 hextoint(int c)
01111 {
01112     if (!isascii((unsigned char) c))
01113         return -1;
01114     if (isdigit((unsigned char) c))
01115         return c - '0';
01116     if ((c >= 'a') && (c <= 'f'))
01117         return c + 10 - 'a';
01118     if ((c >= 'A') && (c <= 'F'))
01119         return c + 10 - 'A';
01120     return -1;
01121 }
01122 
01123 /*
01124  * Convert the byte order of the data we are looking at
01125  */
01126 static int
01127 mconvert(union VALUETYPE *p, struct magic *m)
01128 {
01129     switch (m->type) {
01130         case BYTE:
01131             return 1;
01132         case STRING:
01133             /* Null terminate */
01134             p->s[sizeof(p->s) - 1] = '\0';
01135             return 1;
01136 #ifndef WORDS_BIGENDIAN
01137         case SHORT:
01138 #endif
01139         case BESHORT:
01140             p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01141             return 1;
01142 #ifndef WORDS_BIGENDIAN
01143         case LONG:
01144         case DATE:
01145 #endif
01146         case BELONG:
01147         case BEDATE:
01148             p->l = (long)
01149                 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01150             return 1;
01151 #ifdef WORDS_BIGENDIAN
01152         case SHORT:
01153 #endif
01154         case LESHORT:
01155             p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01156             return 1;
01157 #ifdef WORDS_BIGENDIAN
01158         case LONG:
01159         case DATE:
01160 #endif
01161         case LELONG:
01162         case LEDATE:
01163             p->l = (long)
01164                 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01165             return 1;
01166         default:
01167             kdError(7018) << "mconvert: invalid type " << m->type << endl;
01168             return 0;
01169     }
01170 }
01171 
01172 
01173 static int
01174 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01175      int nbytes)
01176 {
01177     long offset = m->offset;
01178         switch ( m->type )
01179     {
01180         case BYTE:
01181         if ( offset + 1 > nbytes-1 ) // nbytes = (size of file) + 1
01182             return 0;
01183         break;
01184         case SHORT:
01185         case BESHORT:
01186         case LESHORT:
01187             if ( offset + 2 > nbytes-1 )
01188             return 0;
01189         break;
01190         case LONG:
01191         case BELONG:
01192         case LELONG:
01193         case DATE:
01194         case BEDATE:
01195         case LEDATE:
01196             if ( offset + 4 > nbytes-1 )
01197             return 0;
01198         break;
01199         case STRING:
01200         break;
01201     }
01202 
01203 // The file length might be < sizeof(union VALUETYPE) (David)
01204 // -> pad with zeros (the 'file' command does it this way)
01205 // Thanks to Stan Covington <stan@calderasystems.com> for detailed report
01206     if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01207     {
01208       int have = nbytes - offset;
01209       memset(p, 0, sizeof(union VALUETYPE));
01210       if (have > 0)
01211         memcpy(p, s + offset, have);
01212     } else
01213       memcpy(p, s + offset, sizeof(union VALUETYPE));
01214 
01215     if (!mconvert(p, m))
01216         return 0;
01217 
01218     if (m->flag & INDIR) {
01219 
01220         switch (m->in.type) {
01221             case BYTE:
01222                 offset = p->b + m->in.offset;
01223                 break;
01224             case SHORT:
01225                 offset = p->h + m->in.offset;
01226                 break;
01227             case LONG:
01228                 offset = p->l + m->in.offset;
01229                 break;
01230         }
01231 
01232         if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01233              return 0;
01234 
01235         memcpy(p, s + offset, sizeof(union VALUETYPE));
01236 
01237         if (!mconvert(p, m))
01238             return 0;
01239     }
01240     return 1;
01241 }
01242 
01243 static int
01244 mcheck(union VALUETYPE *p, struct magic *m)
01245 {
01246     register unsigned long l = m->value.l;
01247     register unsigned long v;
01248     int matched;
01249 
01250     if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01251         kdError(7018) << "BOINK" << endl;
01252         return 1;
01253     }
01254     switch (m->type) {
01255         case BYTE:
01256             v = p->b;
01257             break;
01258 
01259         case SHORT:
01260         case BESHORT:
01261         case LESHORT:
01262             v = p->h;
01263             break;
01264 
01265         case LONG:
01266         case BELONG:
01267         case LELONG:
01268         case DATE:
01269         case BEDATE:
01270         case LEDATE:
01271             v = p->l;
01272             break;
01273 
01274         case STRING:
01275             l = 0;
01276             /*
01277              * What we want here is: v = strncmp(m->value.s, p->s,
01278              * m->vallen); but ignoring any nulls.  bcmp doesn't give
01279              * -/+/0 and isn't universally available anyway.
01280              */
01281             v = 0;
01282             {
01283                 register unsigned char *a = (unsigned char *) m->value.s;
01284                 register unsigned char *b = (unsigned char *) p->s;
01285                 register int len = m->vallen;
01286                 Q_ASSERT(len);
01287 
01288                 while (--len >= 0)
01289                     if ((v = *b++ - *a++) != 0)
01290                         break;
01291             }
01292             break;
01293         default:
01294             kdError(7018) << "mcheck: invalid type " << m->type << endl;
01295             return 0;   /* NOTREACHED */
01296     }
01297 #if 0
01298     qDebug("Before signextend %08x", v);
01299 #endif
01300     v = signextend(m, v) & m->mask;
01301 #if 0
01302     qDebug("After signextend %08x", v);
01303 #endif
01304 
01305     switch (m->reln) {
01306         case 'x':
01307             matched = 1;
01308             break;
01309 
01310         case '!':
01311             matched = v != l;
01312             break;
01313 
01314         case '=':
01315             matched = v == l;
01316             break;
01317 
01318         case '>':
01319             if (m->flag & UNSIGNED)
01320                 matched = v > l;
01321             else
01322                 matched = (long) v > (long) l;
01323             break;
01324 
01325         case '<':
01326             if (m->flag & UNSIGNED)
01327                 matched = v < l;
01328             else
01329                 matched = (long) v < (long) l;
01330             break;
01331 
01332         case '&':
01333             matched = (v & l) == l;
01334             break;
01335 
01336         case '^':
01337             matched = (v & l) != l;
01338             break;
01339 
01340         default:
01341             matched = 0;
01342             kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01343             break;  /* NOTREACHED */
01344     }
01345 
01346     return matched;
01347 }
01348 
01349 /*
01350  * magic_process - process input file fn. Opens the file and reads a
01351  * fixed-size buffer to begin processing the contents.
01352  */
01353 
01354 void process(struct config_rec* conf, const QString & fn)
01355 {
01356     int fd = 0;
01357     unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
01358     KDE_struct_stat sb;
01359     int nbytes = 0;         /* number of bytes read from a datafile */
01360         int tagbytes = 0;       /* size of prefixed tag */
01361         QCString fileName = QFile::encodeName( fn );
01362 
01363     /*
01364      * first try judging the file based on its filesystem status
01365      */
01366     if (fsmagic(conf, fileName, &sb) != 0) {
01367         //resultBuf += "\n";
01368         return;
01369     }
01370     if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01371         /* We can't open it, but we were able to stat it. */
01372         /*
01373          * if (sb.st_mode & 0002) addResult("writable, ");
01374          * if (sb.st_mode & 0111) addResult("executable, ");
01375          */
01376         //kdDebug(7018) << "can't read `" << fn << "' (" << strerror(errno) << ")." << endl;
01377         conf->resultBuf = MIME_BINARY_UNREADABLE;
01378         return;
01379     }
01380     /*
01381      * try looking at the first HOWMANY bytes
01382      */
01383     if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01384         kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01385         conf->resultBuf = MIME_BINARY_UNREADABLE;
01386         (void)close(fd);
01387         return;
01388     }
01389         if ((tagbytes = tagmagic(buf, nbytes))) {
01390         // Read buffer at new position
01391         lseek(fd, tagbytes, SEEK_SET);
01392         nbytes = read(fd, (char*)buf, HOWMANY);
01393         if (nbytes < 0) {
01394             conf->resultBuf = MIME_BINARY_UNREADABLE;
01395             (void)close(fd);
01396             return;
01397         }
01398         }
01399     if (nbytes == 0) {
01400         conf->resultBuf = MIME_BINARY_ZEROSIZE;
01401     } else {
01402         buf[nbytes++] = '\0';   /* null-terminate it */
01403         tryit(conf, buf, nbytes);
01404     }
01405 
01406         if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01407         {
01408             /*
01409              * Try to restore access, modification times if read it.
01410              * This changes the "change" time (ctime), but we can't do anything
01411              * about that.
01412              */
01413             struct utimbuf utbuf;
01414             utbuf.actime = sb.st_atime;
01415             utbuf.modtime = sb.st_mtime;
01416             (void) utime(fileName, &utbuf);
01417         }
01418     (void) close(fd);
01419 }
01420 
01421 
01422 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01423 {
01424     /* try tests in /etc/magic (or surrogate magic file) */
01425     if (match(conf, buf, nb))
01426         return;
01427 
01428     /* try known keywords, check for ascii-ness too. */
01429     if (ascmagic(conf, buf, nb) == 1)
01430         return;
01431 
01432         /* see if it's plain text */
01433         if (textmagic(conf, buf, nb))
01434                 return;
01435 
01436     /* abandon hope, all ye who remain here */
01437     conf->resultBuf = MIME_BINARY_UNKNOWN;
01438     conf->accuracy = 0;
01439 }
01440 
01441 static int
01442 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01443 {
01444     int ret = 0;
01445 
01446     /*
01447      * Fstat is cheaper but fails for files you don't have read perms on.
01448      * On 4.2BSD and similar systems, use lstat() to identify symlinks.
01449      */
01450     ret = KDE_lstat(fn, sb);  /* don't merge into if; see "ret =" above */
01451 
01452     if (ret) {
01453         return 1;
01454 
01455     }
01456     /*
01457      * if (sb->st_mode & S_ISUID) resultBuf += "setuid ";
01458      * if (sb->st_mode & S_ISGID) resultBuf += "setgid ";
01459      * if (sb->st_mode & S_ISVTX) resultBuf += "sticky ";
01460      */
01461 
01462     switch (sb->st_mode & S_IFMT) {
01463     case S_IFDIR:
01464         conf->resultBuf = MIME_INODE_DIR;
01465         return 1;
01466     case S_IFCHR:
01467         conf->resultBuf = MIME_INODE_CDEV;
01468         return 1;
01469     case S_IFBLK:
01470         conf->resultBuf = MIME_INODE_BDEV;
01471         return 1;
01472         /* TODO add code to handle V7 MUX and Blit MUX files */
01473 #ifdef    S_IFIFO
01474     case S_IFIFO:
01475         conf->resultBuf = MIME_INODE_FIFO;
01476         return 1;
01477 #endif
01478 #ifdef    S_IFLNK
01479     case S_IFLNK:
01480     {
01481         char buf[BUFSIZ + BUFSIZ + 4];
01482         register int nch;
01483         KDE_struct_stat tstatbuf;
01484 
01485         if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01486             conf->resultBuf = MIME_INODE_LINK;
01487             //conf->resultBuf += "\nunreadable";
01488             return 1;
01489         }
01490         buf[nch] = '\0'; /* readlink(2) forgets this */
01491         /* If broken symlink, say so and quit early. */
01492         if (*buf == '/') {
01493             if (KDE_stat(buf, &tstatbuf) < 0) {
01494                 conf->resultBuf = MIME_INODE_LINK;
01495                 //conf->resultBuf += "\nbroken";
01496                 return 1;
01497             }
01498         } else {
01499             char *tmp;
01500             char buf2[BUFSIZ + BUFSIZ + 4];
01501 
01502             strncpy(buf2, fn, BUFSIZ);
01503             buf2[BUFSIZ] = 0;
01504 
01505             if ((tmp = strrchr(buf2, '/')) == NULL) {
01506                 tmp = buf; /* in current dir */
01507             } else {
01508                 /* dir part plus (rel.) link */
01509                 *++tmp = '\0';
01510                 strcat(buf2, buf);
01511                 tmp = buf2;
01512             }
01513             if (KDE_stat(tmp, &tstatbuf) < 0) {
01514                 conf->resultBuf = MIME_INODE_LINK;
01515                 //conf->resultBuf += "\nbroken";
01516                 return 1;
01517             } else
01518                 strcpy(buf, tmp);
01519         }
01520         if (conf->followLinks)
01521             process( conf, QFile::decodeName( buf ) );
01522         else
01523             conf->resultBuf = MIME_INODE_LINK;
01524         return 1;
01525     }
01526     return 1;
01527 #endif
01528 #ifdef    S_IFSOCK
01529 #ifndef __COHERENT__
01530     case S_IFSOCK:
01531         conf->resultBuf = MIME_INODE_SOCK;
01532         return 1;
01533 #endif
01534 #endif
01535     case S_IFREG:
01536         break;
01537     default:
01538         kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01539         /* NOTREACHED */
01540     }
01541 
01542     /*
01543      * regular file, check next possibility
01544      */
01545     if (sb->st_size == 0) {
01546         conf->resultBuf = MIME_BINARY_ZEROSIZE;
01547         return 1;
01548     }
01549     return 0;
01550 }
01551 
01552 /*
01553  * Go through the whole list, stopping if you find a match.  Process all the
01554  * continuations of that match before returning.
01555  *
01556  * We support multi-level continuations:
01557  *
01558  * At any time when processing a successful top-level match, there is a current
01559  * continuation level; it represents the level of the last successfully
01560  * matched continuation.
01561  *
01562  * Continuations above that level are skipped as, if we see one, it means that
01563  * the continuation that controls them - i.e, the lower-level continuation
01564  * preceding them - failed to match.
01565  *
01566  * Continuations below that level are processed as, if we see one, it means
01567  * we've finished processing or skipping higher-level continuations under the
01568  * control of a successful or unsuccessful lower-level continuation, and are
01569  * now seeing the next lower-level continuation and should process it.  The
01570  * current continuation level reverts to the level of the one we're seeing.
01571  *
01572  * Continuations at the current level are processed as, if we see one, there's
01573  * no lower-level continuation that may have failed.
01574  *
01575  * If a continuation matches, we bump the current continuation level so that
01576  * higher-level continuations are processed.
01577  */
01578 static int
01579 match(struct config_rec* conf, unsigned char *s, int nbytes)
01580 {
01581     int cont_level = 0;
01582     union VALUETYPE p;
01583     struct magic *m;
01584 
01585 #ifdef DEBUG_MIMEMAGIC
01586     kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01587     for (m = conf->magic; m; m = m->next) {
01588         if (isprint((((unsigned long) m) >> 24) & 255) &&
01589             isprint((((unsigned long) m) >> 16) & 255) &&
01590             isprint((((unsigned long) m) >> 8) & 255) &&
01591             isprint(((unsigned long) m) & 255)) {
01592             kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01593             break;
01594         }
01595     }
01596 #endif
01597 
01598     for (m = conf->magic; m; m = m->next) {
01599 #ifdef DEBUG_MIMEMAGIC
01600         kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01601 #endif
01602         memset(&p, 0, sizeof(union VALUETYPE));
01603 
01604         /* check if main entry matches */
01605         if (!mget(&p, s, m, nbytes) ||
01606             !mcheck(&p, m)) {
01607             struct magic *m_cont;
01608 
01609             /*
01610              * main entry didn't match, flush its continuations
01611              */
01612             if (!m->next || (m->next->cont_level == 0)) {
01613                 continue;
01614             }
01615             m_cont = m->next;
01616             while (m_cont && (m_cont->cont_level != 0)) {
01617 #ifdef DEBUG_MIMEMAGIC
01618                 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01619 #endif
01620                 /*
01621                  * this trick allows us to keep *m in sync
01622                  * when the continue advances the pointer
01623                  */
01624                 m = m_cont;
01625                 m_cont = m_cont->next;
01626             }
01627             continue;
01628         }
01629         /* if we get here, the main entry rule was a match */
01630         /* this will be the last run through the loop */
01631 #ifdef DEBUG_MIMEMAGIC
01632         kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01633 #endif
01634 
01635         /* remember the match */
01636         conf->resultBuf = m->desc;
01637 
01638         cont_level++;
01639         /*
01640          * while (m && m->next && m->next->cont_level != 0 && ( m =
01641          * m->next ))
01642          */
01643         m = m->next;
01644         while (m && (m->cont_level != 0)) {
01645 #ifdef DEBUG_MIMEMAGIC
01646                     kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01647 #endif
01648                     if (cont_level >= m->cont_level) {
01649                 if (cont_level > m->cont_level) {
01650                     /*
01651                      * We're at the end of the level
01652                      * "cont_level" continuations.
01653                      */
01654                     cont_level = m->cont_level;
01655                 }
01656                 if (mget(&p, s, m, nbytes) &&
01657                     mcheck(&p, m)) {
01658                     /*
01659                      * This continuation matched. Print
01660                      * its message, with a blank before
01661                      * it if the previous item printed
01662                      * and this item isn't empty.
01663                      */
01664 #ifdef DEBUG_MIMEMAGIC
01665                                     kdDebug(7018) << "continuation matched" << endl;
01666 #endif
01667                                     conf->resultBuf = m->desc;
01668                     cont_level++;
01669                 }
01670             }
01671             /* move to next continuation record */
01672             m = m->next;
01673         }
01674                 // KDE-specific: need an actual mimetype for a real match
01675                 // If we only matched a rule with continuations but no mimetype, it's not a match
01676                 if ( !conf->resultBuf.isEmpty() )
01677                 {
01678 #ifdef DEBUG_MIMEMAGIC
01679                     kdDebug(7018) << "match: matched" << endl;
01680 #endif
01681                     return 1;       /* all through */
01682                 }
01683     }
01684 #ifdef DEBUG_MIMEMAGIC
01685     kdDebug(7018) << "match: failed" << endl;
01686 #endif
01687     return 0;               /* no match at all */
01688 }
01689 
01690 // Try to parse prefixed tags before matching on content
01691 // Sofar only ID3v2 tags (<=.4) are handled
01692 static int tagmagic(unsigned char *buf, int nbytes)
01693 {
01694     if(nbytes<40) return 0;
01695     if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01696         int size = 10;
01697         // Sanity (known version, no unknown flags)
01698         if(buf[3] > 4) return 0;
01699         if(buf[5] & 0x0F) return 0;
01700         // Tag has v4 footer
01701         if(buf[5] & 0x10) size += 10;
01702         // Calculated syncsafe size
01703         size += buf[9];
01704         size += buf[8] << 7;
01705         size += buf[7] << 14;
01706         size += buf[6] << 21;
01707         return size;
01708     }
01709     return 0;
01710 }
01711 
01712 struct Token {
01713     char *data;
01714     int length;
01715 };
01716 
01717 struct Tokenizer
01718 {
01719     Tokenizer(char* buf, int nbytes) {
01720         data = buf;
01721         length = nbytes;
01722         pos = 0;
01723     }
01724     bool isNewLine() {
01725         return newline;
01726     }
01727     Token* nextToken() {
01728         if (pos == 0)
01729             newline = true;
01730         else
01731             newline = false;
01732         token.data = data+pos;
01733         token.length = 0;
01734         while(pos<length) {
01735             switch (data[pos]) {
01736                 case '\n':
01737                     newline = true;
01738                 case '\0':
01739                 case '\t':
01740                 case ' ':
01741                 case '\r':
01742                 case '\f':
01743                 case ',':
01744                 case ';':
01745                 case '>':
01746                     if (token.length == 0) token.data++;
01747                     else
01748                         return &token;
01749                     break;
01750                 default:
01751                     token.length++;
01752             }
01753             pos++;
01754         }
01755         return &token;
01756     }
01757 
01758 private:
01759     Token token;
01760     char* data;
01761     int length;
01762     int pos;
01763     bool newline;
01764 };
01765 
01766 
01767 /* an optimization over plain strcmp() */
01768 //#define    STREQ(a, b)    (*(a) == *(b) && strcmp((a), (b)) == 0)
01769 static inline bool STREQ(const Token *token, const char *b) {
01770     const char *a = token->data;
01771     int len = token->length;
01772     if (a == b) return true;
01773     while(*a && *b && len > 0) {
01774         if (*a != *b) return false;
01775         a++; b++; len--;
01776     }
01777     return (len == 0 && *b == 0);
01778 }
01779 
01780 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01781 {
01782     int i;
01783     double pct, maxpct, pctsum;
01784     double pcts[NTYPES];
01785     int mostaccurate, tokencount;
01786     int typeset, jonly, conly, jconly, objconly, cpponly;
01787     int has_escapes = 0;
01788     //unsigned char *s;
01789     //char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */
01790 
01791     /* these are easy, do them first */
01792     conf->accuracy = 70;
01793 
01794     /*
01795      * for troff, look for . + letter + letter or .\"; this must be done
01796      * to disambiguate tar archives' ./file and other trash from real
01797      * troff input.
01798      */
01799     if (*buf == '.') {
01800         unsigned char *tp = buf + 1;
01801 
01802         while (isascii(*tp) && isspace(*tp))
01803             ++tp;   /* skip leading whitespace */
01804         if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01805              isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01806             conf->resultBuf = MIME_APPL_TROFF;
01807             return 1;
01808         }
01809     }
01810     if ((*buf == 'c' || *buf == 'C') &&
01811         isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01812         /* Fortran */
01813         conf->resultBuf = MIME_TEXT_FORTRAN;
01814         return 1;
01815     }
01816     assert(nbytes-1 < HOWMANY + 1);
01817     /* look for tokens - this is expensive! */
01818     has_escapes = (memchr(buf, '\033', nbytes) != NULL);
01819         Tokenizer tokenizer((char*)buf, nbytes);
01820         const Token* token;
01821         bool linecomment = false, blockcomment = false;
01822     const struct names *p;
01823     int typecount[NTYPES];
01824 /*
01825  * Fritz:
01826  * Try a little harder on C/C++/Java.
01827  */
01828     memset(&typecount, 0, sizeof(typecount));
01829     typeset = 0;
01830     jonly = 0;
01831     conly = 0;
01832     jconly = 0;
01833     objconly = 0;
01834     cpponly = 0;
01835     tokencount = 0;
01836         bool foundClass = false; // mandatory for java
01837     // first collect all possible types and count matches
01838         // we stop at '>' too, because of "<title>blah</title>" on HTML pages
01839     while ((token = tokenizer.nextToken())->length > 0) {
01840 #ifdef DEBUG_MIMEMAGIC
01841             kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01842 #endif
01843             if (linecomment && tokenizer.isNewLine())
01844                 linecomment = false;
01845             if (blockcomment && STREQ(token, "*/")) {
01846                 blockcomment = false;
01847                 continue;
01848             }
01849             for (p = names; p->name ; p++) {
01850                 if (STREQ(token, p->name)) {
01851 #ifdef DEBUG_MIMEMAGIC
01852                     kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01853 #endif
01854                     tokencount++;
01855                     typeset |= p->type;
01856                     if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01857                         if (linecomment || blockcomment) {
01858                             continue;
01859                         }
01860                         else {
01861                             switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC))
01862                             {
01863                 case L_JAVA:
01864                     jonly++;
01865                     break;
01866                 case L_OBJC:
01867                     objconly++;
01868                     break;
01869                 case L_CPP:
01870                     cpponly++;
01871                     break;
01872                 case (L_CPP|L_JAVA):
01873                     jconly++;
01874                                         if ( !foundClass && STREQ(token, "class") )
01875                                             foundClass = true;
01876                     break;
01877                 case (L_C|L_CPP):
01878                     conly++;
01879                     break;
01880                 default:
01881                                     if (STREQ(token, "//")) linecomment = true;
01882                                     if (STREQ(token, "/*")) blockcomment = true;
01883                             }
01884             }
01885                     }
01886                     for (i = 0; i < (int)NTYPES; i++) {
01887                         if ((1 << i) & p->type) typecount[i]+= p->type & FLAG_STRONG ? 2 : 1;
01888                     }
01889         }
01890             }
01891     }
01892 
01893     if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01894         conf->accuracy = 60;
01895             if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) {
01896 #ifdef DEBUG_MIMEMAGIC
01897                         kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl;
01898 #endif
01899             if (jonly > 1 && foundClass) {
01900                 // At least two java-only tokens have matched, including "class"
01901                 conf->resultBuf = QString(types[P_JAVA].type);
01902                 return 1;
01903             }
01904             if (jconly > 1) {
01905                 // At least two non-C (only C++ or Java) token have matched.
01906                 if (typecount[P_JAVA] < typecount[P_CPP])
01907                   conf->resultBuf = QString(types[P_CPP].type);
01908                 else
01909                   conf->resultBuf = QString(types[P_JAVA].type);
01910                 return 1;
01911             }
01912                         if (conly + cpponly > 1) {
01913                  // Either C or C++.
01914                       if (cpponly > 0)
01915                                 conf->resultBuf = QString(types[P_CPP].type);
01916                               else
01917                                 conf->resultBuf = QString(types[P_C].type);
01918                               return 1;
01919                         }
01920             if (objconly > 0) {
01921                 conf->resultBuf =  QString(types[P_OBJC].type);
01922                 return 1;
01923             }
01924           }
01925     }
01926 
01927     /* Neither C, C++ or Java (or all of them without able to distinguish):
01928      * Simply take the token-class with the highest
01929      * matchcount > 0
01930      */
01931     mostaccurate = -1;
01932     maxpct = pctsum = 0.0;
01933     for (i = 0; i < (int)NTYPES; i++) {
01934       if (typecount[i] > 1) { // one word is not enough, we need at least two
01935         pct = (double)typecount[i] / (double)types[i].kwords *
01936             (double)types[i].weight;
01937         pcts[i] = pct;
01938         pctsum += pct;
01939         if (pct > maxpct) {
01940             maxpct = pct;
01941             mostaccurate = i;
01942           }
01943 #ifdef DEBUG_MIMEMAGIC
01944           kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01945 #endif
01946       }
01947     }
01948     if (mostaccurate >= 0) {
01949             if ( mostaccurate != P_JAVA || foundClass ) // 'class' mandatory for java
01950             {
01951         conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01952 #ifdef DEBUG_MIMEMAGIC
01953                 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl;
01954 #endif
01955         conf->resultBuf = QString(types[mostaccurate].type);
01956         return 1;
01957             }
01958     }
01959 
01960     switch (is_tar(buf, nbytes)) {
01961         case 1:
01962             /* V7 tar archive */
01963             conf->resultBuf = MIME_APPL_TAR;
01964             conf->accuracy = 90;
01965             return 1;
01966         case 2:
01967             /* POSIX tar archive */
01968             conf->resultBuf = MIME_APPL_TAR;
01969             conf->accuracy = 90;
01970             return 1;
01971     }
01972 
01973     for (i = 0; i < nbytes; i++) {
01974         if (!isascii(*(buf + i)))
01975             return 0;   /* not all ascii */
01976     }
01977 
01978     /* all else fails, but it is ascii... */
01979     conf->accuracy = 90;
01980     if (has_escapes) {
01981         /* text with escape sequences */
01982         /* we leave this open for further differentiation later */
01983         conf->resultBuf = MIME_TEXT_UNKNOWN;
01984     } else {
01985         /* plain text */
01986         conf->resultBuf = MIME_TEXT_PLAIN;
01987     }
01988     return 1;
01989 }
01990 
01991 /* Maximal length of a line we consider "reasonable". */
01992 #define TEXT_MAXLINELEN 300
01993 
01994 // This code is taken from the "file" command, where it is licensed
01995 // in the "beer-ware license" :-)
01996 // Original author: <joerg@FreeBSD.ORG>
01997 // Simplified by David Faure to avoid the static array char[256].
01998 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01999 {
02000     int i;
02001     unsigned char *cp;
02002 
02003     nbytes--;
02004 
02005     /* First, look whether there are "unreasonable" characters. */
02006     for (i = 0, cp = buf; i < nbytes; i++, cp++)
02007         if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
02008             return 0;
02009 
02010     /* Now, look whether the file consists of lines of
02011      * "reasonable" length. */
02012 
02013     for (i = 0; i < nbytes;) {
02014         cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
02015         if (cp == NULL) {
02016             /* Don't fail if we hit the end of buffer. */
02017             if (i + TEXT_MAXLINELEN >= nbytes)
02018                 break;
02019             else
02020                 return 0;
02021         }
02022         if (cp - buf > TEXT_MAXLINELEN)
02023             return 0;
02024         i += (cp - buf + 1);
02025         buf = cp + 1;
02026     }
02027     conf->resultBuf = MIME_TEXT_PLAIN;
02028     return 1;
02029 }
02030 
02031 
02032 /*
02033  * is_tar() -- figure out whether file is a tar archive.
02034  *
02035  * Stolen (by author of file utility) from the public domain tar program: Public
02036  * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
02037  *
02038  * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
02039  * 1997/06/24 00:41:02 ikluft Exp ikluft $
02040  *
02041  * Comments changed and some code/comments reformatted for file command by Ian
02042  * Darwin.
02043  */
02044 
02045 #define    isodigit(c)    ( ((c) >= '0') && ((c) <= '7') )
02046 
02047 /*
02048  * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
02049  * old UNIX tar file, 2 for Unix Std (POSIX) tar file.
02050  */
02051 
02052 static int
02053 is_tar(unsigned char *buf, int nbytes)
02054 {
02055     register union record *header = (union record *) buf;
02056     register int i;
02057     register long sum,
02058      recsum;
02059     register char *p;
02060 
02061     if (nbytes < (int)sizeof(union record))
02062          return 0;
02063 
02064     recsum = from_oct(8, header->header.chksum);
02065 
02066     sum = 0;
02067     p = header->charptr;
02068     for (i = sizeof(union record); --i >= 0;) {
02069         /*
02070          * We can't use unsigned char here because of old compilers,
02071          * e.g. V7.
02072          */
02073         sum += 0xFF & *p++;
02074     }
02075 
02076     /* Adjust checksum to count the "chksum" field as blanks. */
02077     for (i = sizeof(header->header.chksum); --i >= 0;)
02078         sum -= 0xFF & header->header.chksum[i];
02079     sum += ' ' * sizeof header->header.chksum;
02080 
02081     if (sum != recsum)
02082         return 0;       /* Not a tar archive */
02083 
02084     if (0 == strcmp(header->header.magic, TMAGIC))
02085         return 2;       /* Unix Standard tar archive */
02086 
02087     return 1;               /* Old fashioned tar archive */
02088 }
02089 
02090 
02091 /*
02092  * Quick and dirty octal conversion.
02093  *
02094  * Result is -1 if the field is invalid (all blank, or nonoctal).
02095  */
02096 static long
02097 from_oct(int digs, char *where)
02098 {
02099     register long value;
02100 
02101     while (isspace(*where)) {   /* Skip spaces */
02102         where++;
02103         if (--digs <= 0)
02104             return -1;  /* All blank field */
02105     }
02106     value = 0;
02107     while (digs > 0 && isodigit(*where)) {  /* Scan til nonoctal */
02108         value = (value << 3) | (*where++ - '0');
02109         --digs;
02110     }
02111 
02112     if (digs > 0 && *where && !isspace(*where))
02113         return -1;      /* Ended on non-space/nul */
02114 
02115     return value;
02116 }
02117 
02118 KMimeMagic::KMimeMagic()
02119 {
02120     // Magic file detection init
02121     QString mimefile = locate( "mime", "magic" );
02122     init( mimefile );
02123     // Add snippets from share/config/magic/*
02124     QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02125     for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02126         if ( !mergeConfig( *it ) )
02127             kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02128 }
02129 
02130 KMimeMagic::KMimeMagic(const QString & _configfile)
02131 {
02132     init( _configfile );
02133 }
02134 
02135 void KMimeMagic::init( const QString& _configfile )
02136 {
02137     int result;
02138     conf = new config_rec;
02139 
02140     /* set up the magic list (empty) */
02141     conf->magic = conf->last = NULL;
02142     magicResult = NULL;
02143     conf->followLinks = false;
02144 
02145         conf->utimeConf = 0L; // created on demand
02146     /* on the first time through we read the magic file */
02147     result = apprentice(_configfile);
02148     if (result == -1)
02149         return;
02150 #ifdef MIME_MAGIC_DEBUG_TABLE
02151     test_table();
02152 #endif
02153 }
02154 
02155 /*
02156  * The destructor.
02157  * Free the magic-table and other resources.
02158  */
02159 KMimeMagic::~KMimeMagic()
02160 {
02161     if (conf) {
02162         struct magic *p = conf->magic;
02163         struct magic *q;
02164         while (p) {
02165             q = p;
02166             p = p->next;
02167             free(q);
02168         }
02169                 delete conf->utimeConf;
02170         delete conf;
02171     }
02172         delete magicResult;
02173 }
02174 
02175 bool
02176 KMimeMagic::mergeConfig(const QString & _configfile)
02177 {
02178     kdDebug(7018) << k_funcinfo << _configfile << endl;
02179     int result;
02180 
02181     if (_configfile.isEmpty())
02182         return false;
02183     result = apprentice(_configfile);
02184     if (result == -1) {
02185         return false;
02186     }
02187 #ifdef MIME_MAGIC_DEBUG_TABLE
02188     test_table();
02189 #endif
02190     return true;
02191 }
02192 
02193 bool
02194 KMimeMagic::mergeBufConfig(char * _configbuf)
02195 {
02196     int result;
02197 
02198     if (conf) {
02199         result = buff_apprentice(_configbuf);
02200         if (result == -1)
02201             return false;
02202 #ifdef MIME_MAGIC_DEBUG_TABLE
02203         test_table();
02204 #endif
02205         return true;
02206     }
02207     return false;
02208 }
02209 
02210 void
02211 KMimeMagic::setFollowLinks( bool _enable )
02212 {
02213     conf->followLinks = _enable;
02214 }
02215 
02216 KMimeMagicResult *
02217 KMimeMagic::findBufferType(const QByteArray &array)
02218 {
02219     unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
02220 
02221     conf->resultBuf = QString::null;
02222     if ( !magicResult )
02223       magicResult = new KMimeMagicResult();
02224     magicResult->setInvalid();
02225     conf->accuracy = 100;
02226 
02227     int nbytes = array.size();
02228 
02229         if (nbytes > HOWMANY)
02230                 nbytes = HOWMANY;
02231         memcpy(buf, array.data(), nbytes);
02232         if (nbytes == 0) {
02233                 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02234         } else {
02235                 buf[nbytes++] = '\0';   /* null-terminate it */
02236                 tryit(conf, buf, nbytes);
02237         }
02238         /* if we have any results, put them in the request structure */
02239     magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02240     magicResult->setAccuracy(conf->accuracy);
02241         return magicResult;
02242 }
02243 
02244 static void
02245 refineResult(KMimeMagicResult *r, const QString & _filename)
02246 {
02247     QString tmp = r->mimeType();
02248     if (tmp.isEmpty())
02249         return;
02250     if ( tmp == "text/x-c" || tmp == "text/x-objc" )
02251     {
02252         if ( _filename.right(2) == ".h" )
02253             tmp += "hdr";
02254         else
02255             tmp += "src";
02256         r->setMimeType(tmp);
02257     }
02258     else
02259     if ( tmp == "text/x-c++" )
02260     {
02261         if ( _filename.endsWith(".h")
02262           || _filename.endsWith(".hh")
02263           || _filename.endsWith(".H")
02264           || !_filename.right(4).contains('.'))
02265             tmp += "hdr";
02266         else
02267             tmp += "src";
02268         r->setMimeType(tmp);
02269     }
02270     else
02271     if ( tmp == "application/x-sharedlib" )
02272     {
02273         if ( _filename.find( ".so" ) == -1 ) 
02274         {
02275             tmp = "application/x-executable";
02276             r->setMimeType( tmp );
02277         }
02278     }
02279 }
02280 
02281 KMimeMagicResult *
02282 KMimeMagic::findBufferFileType( const QByteArray &data,
02283                 const QString &fn)
02284 {
02285         KMimeMagicResult * r = findBufferType( data );
02286     refineResult(r, fn);
02287         return r;
02288 }
02289 
02290 /*
02291  * Find the content-type of the given file.
02292  */
02293 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02294 {
02295 #ifdef DEBUG_MIMEMAGIC
02296     kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02297 #endif
02298     conf->resultBuf = QString::null;
02299 
02300         if ( !magicResult )
02301       magicResult = new KMimeMagicResult();
02302     magicResult->setInvalid();
02303     conf->accuracy = 100;
02304 
02305         if ( !conf->utimeConf )
02306             conf->utimeConf = new KMimeMagicUtimeConf();
02307 
02308         /* process it based on the file contents */
02309         process(conf, fn );
02310 
02311         /* if we have any results, put them in the request structure */
02312         //finishResult();
02313     magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02314     magicResult->setAccuracy(conf->accuracy);
02315     refineResult(magicResult, fn);
02316         return magicResult;
02317 }
kio

kmimemagic.cpp

kio

API Reference