00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "kmimemagic.h"
00020 #include <kdebug.h>
00021 #include <kapplication.h>
00022 #include <qfile.h>
00023 #include <ksimpleconfig.h>
00024 #include <kstandarddirs.h>
00025 #include <kstaticdeleter.h>
00026 #include <klargefile.h>
00027 #include <assert.h>
00028
00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00030 static void process(struct config_rec* conf, const QString &);
00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00032 static int tagmagic(unsigned char *buf, int nbytes);
00033 static int textmagic(struct config_rec* conf, unsigned char *, int);
00034
00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00036 static int match(struct config_rec* conf, unsigned char *, int);
00037
00038 KMimeMagic* KMimeMagic::s_pSelf;
00039 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00040
00041 KMimeMagic* KMimeMagic::self()
00042 {
00043 if( !s_pSelf )
00044 initStatic();
00045 return s_pSelf;
00046 }
00047
00048 void KMimeMagic::initStatic()
00049 {
00050 s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00051 s_pSelf->setFollowLinks( true );
00052 }
00053
00054 #include <stdio.h>
00055 #include <unistd.h>
00056 #include <stdlib.h>
00057 #include <sys/wait.h>
00058 #include <sys/types.h>
00059 #include <sys/stat.h>
00060 #include <fcntl.h>
00061 #include <errno.h>
00062 #include <ctype.h>
00063 #include <time.h>
00064 #include <utime.h>
00065 #include <stdarg.h>
00066 #include <qregexp.h>
00067 #include <qstring.h>
00068
00069
00070
00071
00072
00073
00074
00075
00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00077 #define DEBUG_LINENUMBERS
00078 #endif
00079
00080
00081
00082
00083 #define MIME_BINARY_UNKNOWN "application/octet-stream"
00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00085 #define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00086 #define MIME_TEXT_UNKNOWN "text/plain"
00087 #define MIME_TEXT_PLAIN "text/plain"
00088 #define MIME_INODE_DIR "inode/directory"
00089 #define MIME_INODE_CDEV "inode/chardevice"
00090 #define MIME_INODE_BDEV "inode/blockdevice"
00091 #define MIME_INODE_FIFO "inode/fifo"
00092 #define MIME_INODE_LINK "inode/link"
00093 #define MIME_INODE_SOCK "inode/socket"
00094
00095 #define MIME_APPL_TROFF "application/x-troff"
00096 #define MIME_APPL_TAR "application/x-tar"
00097 #define MIME_TEXT_FORTRAN "text/x-fortran"
00098
00099 #define MAXMIMESTRING 256
00100
00101 #define HOWMANY 4000
00102 #define MAXDESC 50
00103 #define MAXstring 64
00104
00105 typedef union VALUETYPE {
00106 unsigned char b;
00107 unsigned short h;
00108 unsigned long l;
00109 char s[MAXstring];
00110 unsigned char hs[2];
00111 unsigned char hl[4];
00112 } VALUETYPE;
00113
00114 struct magic {
00115 struct magic *next;
00116 #ifdef DEBUG_LINENUMBERS
00117 int lineno;
00118 #endif
00119
00120 short flag;
00121 #define INDIR 1
00122 #define UNSIGNED 2
00123 short cont_level;
00124 struct {
00125 char type;
00126 long offset;
00127 } in;
00128 long offset;
00129 unsigned char reln;
00130 char type;
00131 char vallen;
00132 #define BYTE 1
00133 #define SHORT 2
00134 #define LONG 4
00135 #define STRING 5
00136 #define DATE 6
00137 #define BESHORT 7
00138 #define BELONG 8
00139 #define BEDATE 9
00140 #define LESHORT 10
00141 #define LELONG 11
00142 #define LEDATE 12
00143 VALUETYPE value;
00144 unsigned long mask;
00145 char nospflag;
00146
00147
00148 char desc[MAXDESC];
00149 };
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165 #define RECORDSIZE 512
00166 #define NAMSIZ 100
00167 #define TUNMLEN 32
00168 #define TGNMLEN 32
00169
00170 union record {
00171 char charptr[RECORDSIZE];
00172 struct header {
00173 char name[NAMSIZ];
00174 char mode[8];
00175 char uid[8];
00176 char gid[8];
00177 char size[12];
00178 char mtime[12];
00179 char chksum[8];
00180 char linkflag;
00181 char linkname[NAMSIZ];
00182 char magic[8];
00183 char uname[TUNMLEN];
00184 char gname[TGNMLEN];
00185 char devmajor[8];
00186 char devminor[8];
00187 } header;
00188 };
00189
00190
00191 #define TMAGIC "ustar "
00192
00193
00194
00195
00196 static int is_tar(unsigned char *, int);
00197 static unsigned long signextend(struct magic *, unsigned long);
00198 static int getvalue(struct magic *, char **);
00199 static int hextoint(int);
00200 static char *getstr(char *, char *, int, int *);
00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00202 static int mcheck(union VALUETYPE *, struct magic *);
00203 static int mconvert(union VALUETYPE *, struct magic *);
00204 static long from_oct(int, char *);
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 #define L_HTML 0x001
00221 #define L_C 0x002
00222 #define L_MAKE 0x004
00223 #define L_PLI 0x008
00224 #define L_MACH 0x010
00225 #define L_PAS 0x020
00226 #define L_JAVA 0x040
00227 #define L_CPP 0x080
00228 #define L_MAIL 0x100
00229 #define L_NEWS 0x200
00230 #define L_DIFF 0x400
00231 #define L_OBJC 0x800
00232
00233
00234 #define FLAG_STRONG 0x1000
00235
00236 #define P_HTML 0
00237 #define P_C 1
00238 #define P_MAKE 2
00239 #define P_PLI 3
00240 #define P_MACH 4
00241 #define P_PAS 5
00242 #define P_JAVA 6
00243 #define P_CPP 7
00244 #define P_MAIL 8
00245 #define P_NEWS 9
00246 #define P_DIFF 10
00247 #define P_OBJC 11
00248
00249 typedef struct asc_type {
00250 const char *type;
00251 int kwords;
00252 double weight;
00253 } asc_type;
00254
00255 static const asc_type types[] = {
00256 { "text/html", 19, 2 },
00257 { "text/x-c", 13, 1 },
00258 { "text/x-makefile", 4, 1.9 },
00259 { "text/x-pli", 1, 3 },
00260 { "text/x-assembler", 6, 2.1 },
00261 { "text/x-pascal", 1, 1 },
00262 { "text/x-java", 12, 1 },
00263 { "text/x-c++", 19, 1 },
00264 { "message/rfc822", 4, 1.9 },
00265 { "message/news", 3, 2 },
00266 { "text/x-diff", 4, 2 },
00267 { "text/x-objc", 10, 1 }
00268 };
00269
00270 #define NTYPES (sizeof(types)/sizeof(asc_type))
00271
00272 static struct names {
00273 const char *name;
00274 short type;
00275 } const names[] = {
00276 {
00277 "<html", L_HTML | FLAG_STRONG
00278 },
00279 {
00280 "<HTML", L_HTML | FLAG_STRONG
00281 },
00282 {
00283 "<head", L_HTML
00284 },
00285 {
00286 "<HEAD", L_HTML
00287 },
00288 {
00289 "<body", L_HTML
00290 },
00291 {
00292 "<BODY", L_HTML
00293 },
00294 {
00295 "<title", L_HTML
00296 },
00297 {
00298 "<TITLE", L_HTML
00299 },
00300 {
00301 "<h1", L_HTML
00302 },
00303 {
00304 "<H1", L_HTML
00305 },
00306 {
00307 "<a", L_HTML
00308 },
00309 {
00310 "<A", L_HTML
00311 },
00312 {
00313 "<img", L_HTML
00314 },
00315 {
00316 "<IMG", L_HTML
00317 },
00318 {
00319 "<!--", L_HTML
00320 },
00321 {
00322 "<!doctype", L_HTML
00323 },
00324 {
00325 "<!DOCTYPE", L_HTML
00326 },
00327 {
00328 "<div", L_HTML
00329 },
00330 {
00331 "<DIV", L_HTML
00332 },
00333 {
00334 "<frame", L_HTML
00335 },
00336 {
00337 "<FRAME", L_HTML
00338 },
00339 {
00340 "<frameset", L_HTML
00341 },
00342 {
00343 "<FRAMESET", L_HTML
00344 },
00345 {
00346 "<script", L_HTML | FLAG_STRONG
00347 },
00348 {
00349 "<SCRIPT", L_HTML | FLAG_STRONG
00350 },
00351 {
00352 "/*", L_C|L_CPP|L_JAVA|L_OBJC
00353 },
00354 {
00355 "//", L_C|L_CPP|L_JAVA|L_OBJC
00356 },
00357 {
00358 "#include", L_C|L_CPP
00359 },
00360 {
00361 "#ifdef", L_C|L_CPP
00362 },
00363 {
00364 "#ifndef", L_C|L_CPP
00365 },
00366 {
00367 "bool", L_C|L_CPP
00368 },
00369 {
00370 "char", L_C|L_CPP|L_JAVA|L_OBJC
00371 },
00372 {
00373 "int", L_C|L_CPP|L_JAVA|L_OBJC
00374 },
00375 {
00376 "float", L_C|L_CPP|L_JAVA|L_OBJC
00377 },
00378 {
00379 "void", L_C|L_CPP|L_JAVA|L_OBJC
00380 },
00381 {
00382 "extern", L_C|L_CPP
00383 },
00384 {
00385 "struct", L_C|L_CPP
00386 },
00387 {
00388 "union", L_C|L_CPP
00389 },
00390 {
00391 "implements", L_JAVA
00392 },
00393 {
00394 "super", L_JAVA
00395 },
00396 {
00397 "import", L_JAVA
00398 },
00399 {
00400 "class", L_CPP|L_JAVA
00401 },
00402 {
00403 "public", L_CPP|L_JAVA
00404 },
00405 {
00406 "private", L_CPP|L_JAVA
00407 },
00408 {
00409 "explicit", L_CPP
00410 },
00411 {
00412 "virtual", L_CPP
00413 },
00414 {
00415 "namespace", L_CPP
00416 },
00417 {
00418 "#import", L_OBJC
00419 },
00420 {
00421 "@interface", L_OBJC
00422 },
00423 {
00424 "@implementation", L_OBJC
00425 },
00426 {
00427 "@protocol", L_OBJC
00428 },
00429 {
00430 "CFLAGS", L_MAKE
00431 },
00432 {
00433 "LDFLAGS", L_MAKE
00434 },
00435 {
00436 "all:", L_MAKE
00437 },
00438 {
00439 ".PHONY:", L_MAKE
00440 },
00441 {
00442 "srcdir", L_MAKE
00443 },
00444 {
00445 "exec_prefix", L_MAKE
00446 },
00447
00448
00449
00450
00451 {
00452 ".ascii", L_MACH
00453 },
00454 {
00455 ".asciiz", L_MACH
00456 },
00457 {
00458 ".byte", L_MACH
00459 },
00460 {
00461 ".even", L_MACH
00462 },
00463 {
00464 ".globl", L_MACH
00465 },
00466 {
00467 "clr", L_MACH
00468 },
00469 {
00470 "(input", L_PAS
00471 },
00472 {
00473 "dcl", L_PLI
00474 },
00475 {
00476 "Received:", L_MAIL
00477 },
00478
00479
00480
00481 {
00482 "Return-Path:", L_MAIL
00483 },
00484 {
00485 "Cc:", L_MAIL
00486 },
00487 {
00488 "Newsgroups:", L_NEWS
00489 },
00490 {
00491 "Path:", L_NEWS
00492 },
00493 {
00494 "Organization:", L_NEWS
00495 },
00496 {
00497 "---", L_DIFF
00498 },
00499 {
00500 "+++", L_DIFF
00501 },
00502 {
00503 "***", L_DIFF
00504 },
00505 {
00506 "@@", L_DIFF
00507 },
00508 {
00509 NULL, 0
00510 }
00511 };
00512
00523 class KMimeMagicUtimeConf
00524 {
00525 public:
00526 KMimeMagicUtimeConf()
00527 {
00528 tmpDirs << QString::fromLatin1("/tmp");
00529
00530
00531
00532 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00533 if ( !confDirs.isEmpty() )
00534 {
00535 QString globalConf = confDirs.last() + "kmimemagicrc";
00536 if ( QFile::exists( globalConf ) )
00537 {
00538 KSimpleConfig cfg( globalConf );
00539 cfg.setGroup( "Settings" );
00540 tmpDirs = cfg.readListEntry( "atimeDirs" );
00541 }
00542 if ( confDirs.count() > 1 )
00543 {
00544 QString localConf = confDirs.first() + "kmimemagicrc";
00545 if ( QFile::exists( localConf ) )
00546 {
00547 KSimpleConfig cfg( localConf );
00548 cfg.setGroup( "Settings" );
00549 tmpDirs += cfg.readListEntry( "atimeDirs" );
00550 }
00551 }
00552 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00553 {
00554 QString dir = *it;
00555 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00556 (*it) += '/';
00557 }
00558 }
00559 #if 0
00560
00561 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00562 kdDebug(7018) << " atimeDir: " << *it << endl;
00563 #endif
00564 }
00565
00566 bool restoreAccessTime( const QString & file ) const
00567 {
00568 QString dir = file.left( file.findRev( '/' ) );
00569 bool res = tmpDirs.contains( dir );
00570
00571 return res;
00572 }
00573 QStringList tmpDirs;
00574 };
00575
00576
00577 struct config_rec {
00578 bool followLinks;
00579 QString resultBuf;
00580 int accuracy;
00581
00582 struct magic *magic,
00583 *last;
00584 KMimeMagicUtimeConf * utimeConf;
00585 };
00586
00587 #ifdef MIME_MAGIC_DEBUG_TABLE
00588 static void
00589 test_table()
00590 {
00591 struct magic *m;
00592 struct magic *prevm = NULL;
00593
00594 kdDebug(7018) << "test_table : started" << endl;
00595 for (m = conf->magic; m; m = m->next) {
00596 if (isprint((((unsigned long) m) >> 24) & 255) &&
00597 isprint((((unsigned long) m) >> 16) & 255) &&
00598 isprint((((unsigned long) m) >> 8) & 255) &&
00599 isprint(((unsigned long) m) & 255)) {
00600
00601
00602 (((unsigned long) m) >> 24) & 255,
00603 (((unsigned long) m) >> 16) & 255,
00604 (((unsigned long) m) >> 8) & 255,
00605 ((unsigned long) m) & 255,
00606 prevm ? prevm->lineno : -1);
00607 break;
00608 }
00609 prevm = m;
00610 }
00611 }
00612 #endif
00613
00614 #define EATAB {while (isascii((unsigned char) *l) && \
00615 isspace((unsigned char) *l)) ++l;}
00616
00617 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00618 {
00619 int ws_offset;
00620
00621
00622 if (line[0]) {
00623 line[strlen(line) - 1] = '\0';
00624 }
00625
00626 ws_offset = 0;
00627 while (line[ws_offset] && isspace(line[ws_offset])) {
00628 ws_offset++;
00629 }
00630
00631
00632 if (line[ws_offset] == 0) {
00633 return 0;
00634 }
00635
00636 if (line[ws_offset] == '#')
00637 return 0;
00638
00639
00640 (*rule)++;
00641
00642
00643 return (parse(line + ws_offset, lineno) != 0);
00644 }
00645
00646
00647
00648
00649 int KMimeMagic::apprentice( const QString& magicfile )
00650 {
00651 FILE *f;
00652 char line[BUFSIZ + 1];
00653 int errs = 0;
00654 int lineno;
00655 int rule = 0;
00656 QCString fname;
00657
00658 if (magicfile.isEmpty())
00659 return -1;
00660 fname = QFile::encodeName(magicfile);
00661 f = fopen(fname, "r");
00662 if (f == NULL) {
00663 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00664 return -1;
00665 }
00666
00667
00668 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00669 if (parse_line(line, &rule, lineno))
00670 errs++;
00671
00672 fclose(f);
00673
00674 #ifdef DEBUG_APPRENTICE
00675 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00676 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00677 #endif
00678
00679 #ifdef MIME_MAGIC_DEBUG_TABLE
00680 test_table();
00681 #endif
00682
00683 return (errs ? -1 : 0);
00684 }
00685
00686 int KMimeMagic::buff_apprentice(char *buff)
00687 {
00688 char line[BUFSIZ + 2];
00689 int errs = 0;
00690 int lineno = 1;
00691 char *start = buff;
00692 char *end;
00693 int count = 0;
00694 int rule = 0;
00695 int len = strlen(buff) + 1;
00696
00697
00698 do {
00699 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00700 strncpy(line, start, count);
00701 line[count] = '\0';
00702 if ((end = strchr(line, '\n'))) {
00703 *(++end) = '\0';
00704 count = strlen(line);
00705 } else
00706 strcat(line, "\n");
00707 start += count;
00708 len -= count;
00709 if (parse_line(line, &rule, lineno))
00710 errs++;
00711 lineno++;
00712 } while (len > 0);
00713
00714 #ifdef DEBUG_APPRENTICE
00715 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00716 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00717 #endif
00718
00719 #ifdef MIME_MAGIC_DEBUG_TABLE
00720 test_table();
00721 #endif
00722
00723 return (errs ? -1 : 0);
00724 }
00725
00726
00727
00728
00729 static unsigned long
00730 signextend(struct magic *m, unsigned long v)
00731 {
00732 if (!(m->flag & UNSIGNED))
00733 switch (m->type) {
00734
00735
00736
00737
00738
00739 case BYTE:
00740 v = (char) v;
00741 break;
00742 case SHORT:
00743 case BESHORT:
00744 case LESHORT:
00745 v = (short) v;
00746 break;
00747 case DATE:
00748 case BEDATE:
00749 case LEDATE:
00750 case LONG:
00751 case BELONG:
00752 case LELONG:
00753 v = (long) v;
00754 break;
00755 case STRING:
00756 break;
00757 default:
00758 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00759 return 998;
00760 }
00761 return v;
00762 }
00763
00764
00765
00766
00767 int KMimeMagic::parse(char *l, int
00768 #ifdef DEBUG_LINENUMBERS
00769 lineno
00770 #endif
00771 )
00772 {
00773 int i = 0;
00774 struct magic *m;
00775 char *t,
00776 *s;
00777
00778 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00779 kdError(7018) << "parse: Out of memory." << endl;
00780 return -1;
00781 }
00782
00783 m->next = NULL;
00784 if (!conf->magic || !conf->last) {
00785 conf->magic = conf->last = m;
00786 } else {
00787 conf->last->next = m;
00788 conf->last = m;
00789 }
00790
00791
00792 m->flag = 0;
00793 m->cont_level = 0;
00794 #ifdef DEBUG_LINENUMBERS
00795 m->lineno = lineno;
00796 #endif
00797
00798 while (*l == '>') {
00799 ++l;
00800 m->cont_level++;
00801 }
00802
00803 if (m->cont_level != 0 && *l == '(') {
00804 ++l;
00805 m->flag |= INDIR;
00806 }
00807
00808 m->offset = (int) strtol(l, &t, 0);
00809 if (l == t) {
00810 kdError(7018) << "parse: offset " << l << " invalid" << endl;
00811 }
00812 l = t;
00813
00814 if (m->flag & INDIR) {
00815 m->in.type = LONG;
00816 m->in.offset = 0;
00817
00818
00819
00820 if (*l == '.') {
00821 switch (*++l) {
00822 case 'l':
00823 m->in.type = LONG;
00824 break;
00825 case 's':
00826 m->in.type = SHORT;
00827 break;
00828 case 'b':
00829 m->in.type = BYTE;
00830 break;
00831 default:
00832 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00833 break;
00834 }
00835 l++;
00836 }
00837 s = l;
00838 if (*l == '+' || *l == '-')
00839 l++;
00840 if (isdigit((unsigned char) *l)) {
00841 m->in.offset = strtol(l, &t, 0);
00842 if (*s == '-')
00843 m->in.offset = -m->in.offset;
00844 } else
00845 t = l;
00846 if (*t++ != ')') {
00847 kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00848 }
00849 l = t;
00850 }
00851 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00852 ++l;
00853 EATAB;
00854
00855 #define NBYTE 4
00856 #define NSHORT 5
00857 #define NLONG 4
00858 #define NSTRING 6
00859 #define NDATE 4
00860 #define NBESHORT 7
00861 #define NBELONG 6
00862 #define NBEDATE 6
00863 #define NLESHORT 7
00864 #define NLELONG 6
00865 #define NLEDATE 6
00866
00867 if (*l == 'u') {
00868 ++l;
00869 m->flag |= UNSIGNED;
00870 }
00871
00872 if (strncmp(l, "byte", NBYTE) == 0) {
00873 m->type = BYTE;
00874 l += NBYTE;
00875 } else if (strncmp(l, "short", NSHORT) == 0) {
00876 m->type = SHORT;
00877 l += NSHORT;
00878 } else if (strncmp(l, "long", NLONG) == 0) {
00879 m->type = LONG;
00880 l += NLONG;
00881 } else if (strncmp(l, "string", NSTRING) == 0) {
00882 m->type = STRING;
00883 l += NSTRING;
00884 } else if (strncmp(l, "date", NDATE) == 0) {
00885 m->type = DATE;
00886 l += NDATE;
00887 } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00888 m->type = BESHORT;
00889 l += NBESHORT;
00890 } else if (strncmp(l, "belong", NBELONG) == 0) {
00891 m->type = BELONG;
00892 l += NBELONG;
00893 } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00894 m->type = BEDATE;
00895 l += NBEDATE;
00896 } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00897 m->type = LESHORT;
00898 l += NLESHORT;
00899 } else if (strncmp(l, "lelong", NLELONG) == 0) {
00900 m->type = LELONG;
00901 l += NLELONG;
00902 } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00903 m->type = LEDATE;
00904 l += NLEDATE;
00905 } else {
00906 kdError(7018) << "parse: type " << l << " invalid" << endl;
00907 return -1;
00908 }
00909
00910 if (*l == '&') {
00911 ++l;
00912 m->mask = signextend(m, strtol(l, &l, 0));
00913 } else
00914 m->mask = (unsigned long) ~0L;
00915 EATAB;
00916
00917 switch (*l) {
00918 case '>':
00919 case '<':
00920
00921 case '&':
00922 case '^':
00923 case '=':
00924 m->reln = *l;
00925 ++l;
00926 break;
00927 case '!':
00928 if (m->type != STRING) {
00929 m->reln = *l;
00930 ++l;
00931 break;
00932 }
00933
00934 default:
00935 if (*l == 'x' && isascii((unsigned char) l[1]) &&
00936 isspace((unsigned char) l[1])) {
00937 m->reln = *l;
00938 ++l;
00939 goto GetDesc;
00940 }
00941 m->reln = '=';
00942 break;
00943 }
00944 EATAB;
00945
00946 if (getvalue(m, &l))
00947 return -1;
00948
00949
00950
00951 GetDesc:
00952 EATAB;
00953 if (l[0] == '\b') {
00954 ++l;
00955 m->nospflag = 1;
00956 } else if ((l[0] == '\\') && (l[1] == 'b')) {
00957 ++l;
00958 ++l;
00959 m->nospflag = 1;
00960 } else
00961 m->nospflag = 0;
00962
00963 while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00964 m->desc[i++] = *l++;
00965 m->desc[i] = '\0';
00966
00967 while (--i>0 && isspace( m->desc[i] ))
00968 m->desc[i] = '\0';
00969
00970
00971
00972
00973 #ifdef DEBUG_APPRENTICE
00974 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00975 #endif
00976 return 0;
00977 }
00978
00979
00980
00981
00982
00983
00984 static int
00985 getvalue(struct magic *m, char **p)
00986 {
00987 int slen;
00988
00989 if (m->type == STRING) {
00990 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00991 m->vallen = slen;
00992 } else if (m->reln != 'x')
00993 m->value.l = signextend(m, strtol(*p, p, 0));
00994 return 0;
00995 }
00996
00997
00998
00999
01000
01001
01002 static char *
01003 getstr(register char *s, register char *p, int plen, int *slen)
01004 {
01005 char *origs = s,
01006 *origp = p;
01007 char *pmax = p + plen - 1;
01008 register int c;
01009 register int val;
01010
01011 while ((c = *s++) != '\0') {
01012 if (isspace((unsigned char) c))
01013 break;
01014 if (p >= pmax) {
01015 kdError(7018) << "String too long: " << origs << endl;
01016 break;
01017 }
01018 if (c == '\\') {
01019 switch (c = *s++) {
01020
01021 case '\0':
01022 goto out;
01023
01024 default:
01025 *p++ = (char) c;
01026 break;
01027
01028 case 'n':
01029 *p++ = '\n';
01030 break;
01031
01032 case 'r':
01033 *p++ = '\r';
01034 break;
01035
01036 case 'b':
01037 *p++ = '\b';
01038 break;
01039
01040 case 't':
01041 *p++ = '\t';
01042 break;
01043
01044 case 'f':
01045 *p++ = '\f';
01046 break;
01047
01048 case 'v':
01049 *p++ = '\v';
01050 break;
01051
01052
01053 case '0':
01054 case '1':
01055 case '2':
01056 case '3':
01057 case '4':
01058 case '5':
01059 case '6':
01060 case '7':
01061 val = c - '0';
01062 c = *s++;
01063 if (c >= '0' && c <= '7') {
01064 val = (val << 3) | (c - '0');
01065 c = *s++;
01066 if (c >= '0' && c <= '7')
01067 val = (val << 3) | (c - '0');
01068 else
01069 --s;
01070 } else
01071 --s;
01072 *p++ = (char) val;
01073 break;
01074
01075
01076 case 'x':
01077 val = 'x';
01078 c = hextoint(*s++);
01079 if (c >= 0) {
01080 val = c;
01081 c = hextoint(*s++);
01082 if (c >= 0) {
01083 val = (val << 4) + c;
01084 c = hextoint(*s++);
01085 if (c >= 0) {
01086 val = (val << 4) + c;
01087 } else
01088 --s;
01089 } else
01090 --s;
01091 } else
01092 --s;
01093 *p++ = (char) val;
01094 break;
01095 }
01096 } else
01097 *p++ = (char) c;
01098 }
01099 out:
01100 *p = '\0';
01101 *slen = p - origp;
01102
01103
01104 return s;
01105 }
01106
01107
01108
01109 static int
01110 hextoint(int c)
01111 {
01112 if (!isascii((unsigned char) c))
01113 return -1;
01114 if (isdigit((unsigned char) c))
01115 return c - '0';
01116 if ((c >= 'a') && (c <= 'f'))
01117 return c + 10 - 'a';
01118 if ((c >= 'A') && (c <= 'F'))
01119 return c + 10 - 'A';
01120 return -1;
01121 }
01122
01123
01124
01125
01126 static int
01127 mconvert(union VALUETYPE *p, struct magic *m)
01128 {
01129 switch (m->type) {
01130 case BYTE:
01131 return 1;
01132 case STRING:
01133
01134 p->s[sizeof(p->s) - 1] = '\0';
01135 return 1;
01136 #ifndef WORDS_BIGENDIAN
01137 case SHORT:
01138 #endif
01139 case BESHORT:
01140 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01141 return 1;
01142 #ifndef WORDS_BIGENDIAN
01143 case LONG:
01144 case DATE:
01145 #endif
01146 case BELONG:
01147 case BEDATE:
01148 p->l = (long)
01149 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01150 return 1;
01151 #ifdef WORDS_BIGENDIAN
01152 case SHORT:
01153 #endif
01154 case LESHORT:
01155 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01156 return 1;
01157 #ifdef WORDS_BIGENDIAN
01158 case LONG:
01159 case DATE:
01160 #endif
01161 case LELONG:
01162 case LEDATE:
01163 p->l = (long)
01164 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01165 return 1;
01166 default:
01167 kdError(7018) << "mconvert: invalid type " << m->type << endl;
01168 return 0;
01169 }
01170 }
01171
01172
01173 static int
01174 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01175 int nbytes)
01176 {
01177 long offset = m->offset;
01178 switch ( m->type )
01179 {
01180 case BYTE:
01181 if ( offset + 1 > nbytes-1 )
01182 return 0;
01183 break;
01184 case SHORT:
01185 case BESHORT:
01186 case LESHORT:
01187 if ( offset + 2 > nbytes-1 )
01188 return 0;
01189 break;
01190 case LONG:
01191 case BELONG:
01192 case LELONG:
01193 case DATE:
01194 case BEDATE:
01195 case LEDATE:
01196 if ( offset + 4 > nbytes-1 )
01197 return 0;
01198 break;
01199 case STRING:
01200 break;
01201 }
01202
01203
01204
01205
01206 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01207 {
01208 int have = nbytes - offset;
01209 memset(p, 0, sizeof(union VALUETYPE));
01210 if (have > 0)
01211 memcpy(p, s + offset, have);
01212 } else
01213 memcpy(p, s + offset, sizeof(union VALUETYPE));
01214
01215 if (!mconvert(p, m))
01216 return 0;
01217
01218 if (m->flag & INDIR) {
01219
01220 switch (m->in.type) {
01221 case BYTE:
01222 offset = p->b + m->in.offset;
01223 break;
01224 case SHORT:
01225 offset = p->h + m->in.offset;
01226 break;
01227 case LONG:
01228 offset = p->l + m->in.offset;
01229 break;
01230 }
01231
01232 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01233 return 0;
01234
01235 memcpy(p, s + offset, sizeof(union VALUETYPE));
01236
01237 if (!mconvert(p, m))
01238 return 0;
01239 }
01240 return 1;
01241 }
01242
01243 static int
01244 mcheck(union VALUETYPE *p, struct magic *m)
01245 {
01246 register unsigned long l = m->value.l;
01247 register unsigned long v;
01248 int matched;
01249
01250 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01251 kdError(7018) << "BOINK" << endl;
01252 return 1;
01253 }
01254 switch (m->type) {
01255 case BYTE:
01256 v = p->b;
01257 break;
01258
01259 case SHORT:
01260 case BESHORT:
01261 case LESHORT:
01262 v = p->h;
01263 break;
01264
01265 case LONG:
01266 case BELONG:
01267 case LELONG:
01268 case DATE:
01269 case BEDATE:
01270 case LEDATE:
01271 v = p->l;
01272 break;
01273
01274 case STRING:
01275 l = 0;
01276
01277
01278
01279
01280
01281 v = 0;
01282 {
01283 register unsigned char *a = (unsigned char *) m->value.s;
01284 register unsigned char *b = (unsigned char *) p->s;
01285 register int len = m->vallen;
01286 Q_ASSERT(len);
01287
01288 while (--len >= 0)
01289 if ((v = *b++ - *a++) != 0)
01290 break;
01291 }
01292 break;
01293 default:
01294 kdError(7018) << "mcheck: invalid type " << m->type << endl;
01295 return 0;
01296 }
01297 #if 0
01298 qDebug("Before signextend %08x", v);
01299 #endif
01300 v = signextend(m, v) & m->mask;
01301 #if 0
01302 qDebug("After signextend %08x", v);
01303 #endif
01304
01305 switch (m->reln) {
01306 case 'x':
01307 matched = 1;
01308 break;
01309
01310 case '!':
01311 matched = v != l;
01312 break;
01313
01314 case '=':
01315 matched = v == l;
01316 break;
01317
01318 case '>':
01319 if (m->flag & UNSIGNED)
01320 matched = v > l;
01321 else
01322 matched = (long) v > (long) l;
01323 break;
01324
01325 case '<':
01326 if (m->flag & UNSIGNED)
01327 matched = v < l;
01328 else
01329 matched = (long) v < (long) l;
01330 break;
01331
01332 case '&':
01333 matched = (v & l) == l;
01334 break;
01335
01336 case '^':
01337 matched = (v & l) != l;
01338 break;
01339
01340 default:
01341 matched = 0;
01342 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01343 break;
01344 }
01345
01346 return matched;
01347 }
01348
01349
01350
01351
01352
01353
01354 void process(struct config_rec* conf, const QString & fn)
01355 {
01356 int fd = 0;
01357 unsigned char buf[HOWMANY + 1];
01358 KDE_struct_stat sb;
01359 int nbytes = 0;
01360 int tagbytes = 0;
01361 QCString fileName = QFile::encodeName( fn );
01362
01363
01364
01365
01366 if (fsmagic(conf, fileName, &sb) != 0) {
01367
01368 return;
01369 }
01370 if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01371
01372
01373
01374
01375
01376
01377 conf->resultBuf = MIME_BINARY_UNREADABLE;
01378 return;
01379 }
01380
01381
01382
01383 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01384 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01385 conf->resultBuf = MIME_BINARY_UNREADABLE;
01386 (void)close(fd);
01387 return;
01388 }
01389 if ((tagbytes = tagmagic(buf, nbytes))) {
01390
01391 lseek(fd, tagbytes, SEEK_SET);
01392 nbytes = read(fd, (char*)buf, HOWMANY);
01393 if (nbytes < 0) {
01394 conf->resultBuf = MIME_BINARY_UNREADABLE;
01395 (void)close(fd);
01396 return;
01397 }
01398 }
01399 if (nbytes == 0) {
01400 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01401 } else {
01402 buf[nbytes++] = '\0';
01403 tryit(conf, buf, nbytes);
01404 }
01405
01406 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01407 {
01408
01409
01410
01411
01412
01413 struct utimbuf utbuf;
01414 utbuf.actime = sb.st_atime;
01415 utbuf.modtime = sb.st_mtime;
01416 (void) utime(fileName, &utbuf);
01417 }
01418 (void) close(fd);
01419 }
01420
01421
01422 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01423 {
01424
01425 if (match(conf, buf, nb))
01426 return;
01427
01428
01429 if (ascmagic(conf, buf, nb) == 1)
01430 return;
01431
01432
01433 if (textmagic(conf, buf, nb))
01434 return;
01435
01436
01437 conf->resultBuf = MIME_BINARY_UNKNOWN;
01438 conf->accuracy = 0;
01439 }
01440
01441 static int
01442 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01443 {
01444 int ret = 0;
01445
01446
01447
01448
01449
01450 ret = KDE_lstat(fn, sb);
01451
01452 if (ret) {
01453 return 1;
01454
01455 }
01456
01457
01458
01459
01460
01461
01462 switch (sb->st_mode & S_IFMT) {
01463 case S_IFDIR:
01464 conf->resultBuf = MIME_INODE_DIR;
01465 return 1;
01466 case S_IFCHR:
01467 conf->resultBuf = MIME_INODE_CDEV;
01468 return 1;
01469 case S_IFBLK:
01470 conf->resultBuf = MIME_INODE_BDEV;
01471 return 1;
01472
01473 #ifdef S_IFIFO
01474 case S_IFIFO:
01475 conf->resultBuf = MIME_INODE_FIFO;
01476 return 1;
01477 #endif
01478 #ifdef S_IFLNK
01479 case S_IFLNK:
01480 {
01481 char buf[BUFSIZ + BUFSIZ + 4];
01482 register int nch;
01483 KDE_struct_stat tstatbuf;
01484
01485 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01486 conf->resultBuf = MIME_INODE_LINK;
01487
01488 return 1;
01489 }
01490 buf[nch] = '\0';
01491
01492 if (*buf == '/') {
01493 if (KDE_stat(buf, &tstatbuf) < 0) {
01494 conf->resultBuf = MIME_INODE_LINK;
01495
01496 return 1;
01497 }
01498 } else {
01499 char *tmp;
01500 char buf2[BUFSIZ + BUFSIZ + 4];
01501
01502 strncpy(buf2, fn, BUFSIZ);
01503 buf2[BUFSIZ] = 0;
01504
01505 if ((tmp = strrchr(buf2, '/')) == NULL) {
01506 tmp = buf;
01507 } else {
01508
01509 *++tmp = '\0';
01510 strcat(buf2, buf);
01511 tmp = buf2;
01512 }
01513 if (KDE_stat(tmp, &tstatbuf) < 0) {
01514 conf->resultBuf = MIME_INODE_LINK;
01515
01516 return 1;
01517 } else
01518 strcpy(buf, tmp);
01519 }
01520 if (conf->followLinks)
01521 process( conf, QFile::decodeName( buf ) );
01522 else
01523 conf->resultBuf = MIME_INODE_LINK;
01524 return 1;
01525 }
01526 return 1;
01527 #endif
01528 #ifdef S_IFSOCK
01529 #ifndef __COHERENT__
01530 case S_IFSOCK:
01531 conf->resultBuf = MIME_INODE_SOCK;
01532 return 1;
01533 #endif
01534 #endif
01535 case S_IFREG:
01536 break;
01537 default:
01538 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01539
01540 }
01541
01542
01543
01544
01545 if (sb->st_size == 0) {
01546 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01547 return 1;
01548 }
01549 return 0;
01550 }
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578 static int
01579 match(struct config_rec* conf, unsigned char *s, int nbytes)
01580 {
01581 int cont_level = 0;
01582 union VALUETYPE p;
01583 struct magic *m;
01584
01585 #ifdef DEBUG_MIMEMAGIC
01586 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01587 for (m = conf->magic; m; m = m->next) {
01588 if (isprint((((unsigned long) m) >> 24) & 255) &&
01589 isprint((((unsigned long) m) >> 16) & 255) &&
01590 isprint((((unsigned long) m) >> 8) & 255) &&
01591 isprint(((unsigned long) m) & 255)) {
01592 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01593 break;
01594 }
01595 }
01596 #endif
01597
01598 for (m = conf->magic; m; m = m->next) {
01599 #ifdef DEBUG_MIMEMAGIC
01600 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01601 #endif
01602 memset(&p, 0, sizeof(union VALUETYPE));
01603
01604
01605 if (!mget(&p, s, m, nbytes) ||
01606 !mcheck(&p, m)) {
01607 struct magic *m_cont;
01608
01609
01610
01611
01612 if (!m->next || (m->next->cont_level == 0)) {
01613 continue;
01614 }
01615 m_cont = m->next;
01616 while (m_cont && (m_cont->cont_level != 0)) {
01617 #ifdef DEBUG_MIMEMAGIC
01618 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01619 #endif
01620
01621
01622
01623
01624 m = m_cont;
01625 m_cont = m_cont->next;
01626 }
01627 continue;
01628 }
01629
01630
01631 #ifdef DEBUG_MIMEMAGIC
01632 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01633 #endif
01634
01635
01636 conf->resultBuf = m->desc;
01637
01638 cont_level++;
01639
01640
01641
01642
01643 m = m->next;
01644 while (m && (m->cont_level != 0)) {
01645 #ifdef DEBUG_MIMEMAGIC
01646 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01647 #endif
01648 if (cont_level >= m->cont_level) {
01649 if (cont_level > m->cont_level) {
01650
01651
01652
01653
01654 cont_level = m->cont_level;
01655 }
01656 if (mget(&p, s, m, nbytes) &&
01657 mcheck(&p, m)) {
01658
01659
01660
01661
01662
01663
01664 #ifdef DEBUG_MIMEMAGIC
01665 kdDebug(7018) << "continuation matched" << endl;
01666 #endif
01667 conf->resultBuf = m->desc;
01668 cont_level++;
01669 }
01670 }
01671
01672 m = m->next;
01673 }
01674
01675
01676 if ( !conf->resultBuf.isEmpty() )
01677 {
01678 #ifdef DEBUG_MIMEMAGIC
01679 kdDebug(7018) << "match: matched" << endl;
01680 #endif
01681 return 1;
01682 }
01683 }
01684 #ifdef DEBUG_MIMEMAGIC
01685 kdDebug(7018) << "match: failed" << endl;
01686 #endif
01687 return 0;
01688 }
01689
01690
01691
01692 static int tagmagic(unsigned char *buf, int nbytes)
01693 {
01694 if(nbytes<40) return 0;
01695 if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01696 int size = 10;
01697
01698 if(buf[3] > 4) return 0;
01699 if(buf[5] & 0x0F) return 0;
01700
01701 if(buf[5] & 0x10) size += 10;
01702
01703 size += buf[9];
01704 size += buf[8] << 7;
01705 size += buf[7] << 14;
01706 size += buf[6] << 21;
01707 return size;
01708 }
01709 return 0;
01710 }
01711
01712 struct Token {
01713 char *data;
01714 int length;
01715 };
01716
01717 struct Tokenizer
01718 {
01719 Tokenizer(char* buf, int nbytes) {
01720 data = buf;
01721 length = nbytes;
01722 pos = 0;
01723 }
01724 bool isNewLine() {
01725 return newline;
01726 }
01727 Token* nextToken() {
01728 if (pos == 0)
01729 newline = true;
01730 else
01731 newline = false;
01732 token.data = data+pos;
01733 token.length = 0;
01734 while(pos<length) {
01735 switch (data[pos]) {
01736 case '\n':
01737 newline = true;
01738 case '\0':
01739 case '\t':
01740 case ' ':
01741 case '\r':
01742 case '\f':
01743 case ',':
01744 case ';':
01745 case '>':
01746 if (token.length == 0) token.data++;
01747 else
01748 return &token;
01749 break;
01750 default:
01751 token.length++;
01752 }
01753 pos++;
01754 }
01755 return &token;
01756 }
01757
01758 private:
01759 Token token;
01760 char* data;
01761 int length;
01762 int pos;
01763 bool newline;
01764 };
01765
01766
01767
01768
01769 static inline bool STREQ(const Token *token, const char *b) {
01770 const char *a = token->data;
01771 int len = token->length;
01772 if (a == b) return true;
01773 while(*a && *b && len > 0) {
01774 if (*a != *b) return false;
01775 a++; b++; len--;
01776 }
01777 return (len == 0 && *b == 0);
01778 }
01779
01780 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01781 {
01782 int i;
01783 double pct, maxpct, pctsum;
01784 double pcts[NTYPES];
01785 int mostaccurate, tokencount;
01786 int typeset, jonly, conly, jconly, objconly, cpponly;
01787 int has_escapes = 0;
01788
01789
01790
01791
01792 conf->accuracy = 70;
01793
01794
01795
01796
01797
01798
01799 if (*buf == '.') {
01800 unsigned char *tp = buf + 1;
01801
01802 while (isascii(*tp) && isspace(*tp))
01803 ++tp;
01804 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01805 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01806 conf->resultBuf = MIME_APPL_TROFF;
01807 return 1;
01808 }
01809 }
01810 if ((*buf == 'c' || *buf == 'C') &&
01811 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01812
01813 conf->resultBuf = MIME_TEXT_FORTRAN;
01814 return 1;
01815 }
01816 assert(nbytes-1 < HOWMANY + 1);
01817
01818 has_escapes = (memchr(buf, '\033', nbytes) != NULL);
01819 Tokenizer tokenizer((char*)buf, nbytes);
01820 const Token* token;
01821 bool linecomment = false, blockcomment = false;
01822 const struct names *p;
01823 int typecount[NTYPES];
01824
01825
01826
01827
01828 memset(&typecount, 0, sizeof(typecount));
01829 typeset = 0;
01830 jonly = 0;
01831 conly = 0;
01832 jconly = 0;
01833 objconly = 0;
01834 cpponly = 0;
01835 tokencount = 0;
01836 bool foundClass = false;
01837
01838
01839 while ((token = tokenizer.nextToken())->length > 0) {
01840 #ifdef DEBUG_MIMEMAGIC
01841 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01842 #endif
01843 if (linecomment && tokenizer.isNewLine())
01844 linecomment = false;
01845 if (blockcomment && STREQ(token, "*/")) {
01846 blockcomment = false;
01847 continue;
01848 }
01849 for (p = names; p->name ; p++) {
01850 if (STREQ(token, p->name)) {
01851 #ifdef DEBUG_MIMEMAGIC
01852 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01853 #endif
01854 tokencount++;
01855 typeset |= p->type;
01856 if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01857 if (linecomment || blockcomment) {
01858 continue;
01859 }
01860 else {
01861 switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC))
01862 {
01863 case L_JAVA:
01864 jonly++;
01865 break;
01866 case L_OBJC:
01867 objconly++;
01868 break;
01869 case L_CPP:
01870 cpponly++;
01871 break;
01872 case (L_CPP|L_JAVA):
01873 jconly++;
01874 if ( !foundClass && STREQ(token, "class") )
01875 foundClass = true;
01876 break;
01877 case (L_C|L_CPP):
01878 conly++;
01879 break;
01880 default:
01881 if (STREQ(token, "//")) linecomment = true;
01882 if (STREQ(token, "/*")) blockcomment = true;
01883 }
01884 }
01885 }
01886 for (i = 0; i < (int)NTYPES; i++) {
01887 if ((1 << i) & p->type) typecount[i]+= p->type & FLAG_STRONG ? 2 : 1;
01888 }
01889 }
01890 }
01891 }
01892
01893 if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01894 conf->accuracy = 60;
01895 if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) {
01896 #ifdef DEBUG_MIMEMAGIC
01897 kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl;
01898 #endif
01899 if (jonly > 1 && foundClass) {
01900
01901 conf->resultBuf = QString(types[P_JAVA].type);
01902 return 1;
01903 }
01904 if (jconly > 1) {
01905
01906 if (typecount[P_JAVA] < typecount[P_CPP])
01907 conf->resultBuf = QString(types[P_CPP].type);
01908 else
01909 conf->resultBuf = QString(types[P_JAVA].type);
01910 return 1;
01911 }
01912 if (conly + cpponly > 1) {
01913
01914 if (cpponly > 0)
01915 conf->resultBuf = QString(types[P_CPP].type);
01916 else
01917 conf->resultBuf = QString(types[P_C].type);
01918 return 1;
01919 }
01920 if (objconly > 0) {
01921 conf->resultBuf = QString(types[P_OBJC].type);
01922 return 1;
01923 }
01924 }
01925 }
01926
01927
01928
01929
01930
01931 mostaccurate = -1;
01932 maxpct = pctsum = 0.0;
01933 for (i = 0; i < (int)NTYPES; i++) {
01934 if (typecount[i] > 1) {
01935 pct = (double)typecount[i] / (double)types[i].kwords *
01936 (double)types[i].weight;
01937 pcts[i] = pct;
01938 pctsum += pct;
01939 if (pct > maxpct) {
01940 maxpct = pct;
01941 mostaccurate = i;
01942 }
01943 #ifdef DEBUG_MIMEMAGIC
01944 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01945 #endif
01946 }
01947 }
01948 if (mostaccurate >= 0) {
01949 if ( mostaccurate != P_JAVA || foundClass )
01950 {
01951 conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01952 #ifdef DEBUG_MIMEMAGIC
01953 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl;
01954 #endif
01955 conf->resultBuf = QString(types[mostaccurate].type);
01956 return 1;
01957 }
01958 }
01959
01960 switch (is_tar(buf, nbytes)) {
01961 case 1:
01962
01963 conf->resultBuf = MIME_APPL_TAR;
01964 conf->accuracy = 90;
01965 return 1;
01966 case 2:
01967
01968 conf->resultBuf = MIME_APPL_TAR;
01969 conf->accuracy = 90;
01970 return 1;
01971 }
01972
01973 for (i = 0; i < nbytes; i++) {
01974 if (!isascii(*(buf + i)))
01975 return 0;
01976 }
01977
01978
01979 conf->accuracy = 90;
01980 if (has_escapes) {
01981
01982
01983 conf->resultBuf = MIME_TEXT_UNKNOWN;
01984 } else {
01985
01986 conf->resultBuf = MIME_TEXT_PLAIN;
01987 }
01988 return 1;
01989 }
01990
01991
01992 #define TEXT_MAXLINELEN 300
01993
01994
01995
01996
01997
01998 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01999 {
02000 int i;
02001 unsigned char *cp;
02002
02003 nbytes--;
02004
02005
02006 for (i = 0, cp = buf; i < nbytes; i++, cp++)
02007 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
02008 return 0;
02009
02010
02011
02012
02013 for (i = 0; i < nbytes;) {
02014 cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
02015 if (cp == NULL) {
02016
02017 if (i + TEXT_MAXLINELEN >= nbytes)
02018 break;
02019 else
02020 return 0;
02021 }
02022 if (cp - buf > TEXT_MAXLINELEN)
02023 return 0;
02024 i += (cp - buf + 1);
02025 buf = cp + 1;
02026 }
02027 conf->resultBuf = MIME_TEXT_PLAIN;
02028 return 1;
02029 }
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040
02041
02042
02043
02044
02045 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
02046
02047
02048
02049
02050
02051
02052 static int
02053 is_tar(unsigned char *buf, int nbytes)
02054 {
02055 register union record *header = (union record *) buf;
02056 register int i;
02057 register long sum,
02058 recsum;
02059 register char *p;
02060
02061 if (nbytes < (int)sizeof(union record))
02062 return 0;
02063
02064 recsum = from_oct(8, header->header.chksum);
02065
02066 sum = 0;
02067 p = header->charptr;
02068 for (i = sizeof(union record); --i >= 0;) {
02069
02070
02071
02072
02073 sum += 0xFF & *p++;
02074 }
02075
02076
02077 for (i = sizeof(header->header.chksum); --i >= 0;)
02078 sum -= 0xFF & header->header.chksum[i];
02079 sum += ' ' * sizeof header->header.chksum;
02080
02081 if (sum != recsum)
02082 return 0;
02083
02084 if (0 == strcmp(header->header.magic, TMAGIC))
02085 return 2;
02086
02087 return 1;
02088 }
02089
02090
02091
02092
02093
02094
02095
02096 static long
02097 from_oct(int digs, char *where)
02098 {
02099 register long value;
02100
02101 while (isspace(*where)) {
02102 where++;
02103 if (--digs <= 0)
02104 return -1;
02105 }
02106 value = 0;
02107 while (digs > 0 && isodigit(*where)) {
02108 value = (value << 3) | (*where++ - '0');
02109 --digs;
02110 }
02111
02112 if (digs > 0 && *where && !isspace(*where))
02113 return -1;
02114
02115 return value;
02116 }
02117
02118 KMimeMagic::KMimeMagic()
02119 {
02120
02121 QString mimefile = locate( "mime", "magic" );
02122 init( mimefile );
02123
02124 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02125 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02126 if ( !mergeConfig( *it ) )
02127 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02128 }
02129
02130 KMimeMagic::KMimeMagic(const QString & _configfile)
02131 {
02132 init( _configfile );
02133 }
02134
02135 void KMimeMagic::init( const QString& _configfile )
02136 {
02137 int result;
02138 conf = new config_rec;
02139
02140
02141 conf->magic = conf->last = NULL;
02142 magicResult = NULL;
02143 conf->followLinks = false;
02144
02145 conf->utimeConf = 0L;
02146
02147 result = apprentice(_configfile);
02148 if (result == -1)
02149 return;
02150 #ifdef MIME_MAGIC_DEBUG_TABLE
02151 test_table();
02152 #endif
02153 }
02154
02155
02156
02157
02158
02159 KMimeMagic::~KMimeMagic()
02160 {
02161 if (conf) {
02162 struct magic *p = conf->magic;
02163 struct magic *q;
02164 while (p) {
02165 q = p;
02166 p = p->next;
02167 free(q);
02168 }
02169 delete conf->utimeConf;
02170 delete conf;
02171 }
02172 delete magicResult;
02173 }
02174
02175 bool
02176 KMimeMagic::mergeConfig(const QString & _configfile)
02177 {
02178 kdDebug(7018) << k_funcinfo << _configfile << endl;
02179 int result;
02180
02181 if (_configfile.isEmpty())
02182 return false;
02183 result = apprentice(_configfile);
02184 if (result == -1) {
02185 return false;
02186 }
02187 #ifdef MIME_MAGIC_DEBUG_TABLE
02188 test_table();
02189 #endif
02190 return true;
02191 }
02192
02193 bool
02194 KMimeMagic::mergeBufConfig(char * _configbuf)
02195 {
02196 int result;
02197
02198 if (conf) {
02199 result = buff_apprentice(_configbuf);
02200 if (result == -1)
02201 return false;
02202 #ifdef MIME_MAGIC_DEBUG_TABLE
02203 test_table();
02204 #endif
02205 return true;
02206 }
02207 return false;
02208 }
02209
02210 void
02211 KMimeMagic::setFollowLinks( bool _enable )
02212 {
02213 conf->followLinks = _enable;
02214 }
02215
02216 KMimeMagicResult *
02217 KMimeMagic::findBufferType(const QByteArray &array)
02218 {
02219 unsigned char buf[HOWMANY + 1];
02220
02221 conf->resultBuf = QString::null;
02222 if ( !magicResult )
02223 magicResult = new KMimeMagicResult();
02224 magicResult->setInvalid();
02225 conf->accuracy = 100;
02226
02227 int nbytes = array.size();
02228
02229 if (nbytes > HOWMANY)
02230 nbytes = HOWMANY;
02231 memcpy(buf, array.data(), nbytes);
02232 if (nbytes == 0) {
02233 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02234 } else {
02235 buf[nbytes++] = '\0';
02236 tryit(conf, buf, nbytes);
02237 }
02238
02239 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02240 magicResult->setAccuracy(conf->accuracy);
02241 return magicResult;
02242 }
02243
02244 static void
02245 refineResult(KMimeMagicResult *r, const QString & _filename)
02246 {
02247 QString tmp = r->mimeType();
02248 if (tmp.isEmpty())
02249 return;
02250 if ( tmp == "text/x-c" || tmp == "text/x-objc" )
02251 {
02252 if ( _filename.right(2) == ".h" )
02253 tmp += "hdr";
02254 else
02255 tmp += "src";
02256 r->setMimeType(tmp);
02257 }
02258 else
02259 if ( tmp == "text/x-c++" )
02260 {
02261 if ( _filename.endsWith(".h")
02262 || _filename.endsWith(".hh")
02263 || _filename.endsWith(".H")
02264 || !_filename.right(4).contains('.'))
02265 tmp += "hdr";
02266 else
02267 tmp += "src";
02268 r->setMimeType(tmp);
02269 }
02270 else
02271 if ( tmp == "application/x-sharedlib" )
02272 {
02273 if ( _filename.find( ".so" ) == -1 )
02274 {
02275 tmp = "application/x-executable";
02276 r->setMimeType( tmp );
02277 }
02278 }
02279 }
02280
02281 KMimeMagicResult *
02282 KMimeMagic::findBufferFileType( const QByteArray &data,
02283 const QString &fn)
02284 {
02285 KMimeMagicResult * r = findBufferType( data );
02286 refineResult(r, fn);
02287 return r;
02288 }
02289
02290
02291
02292
02293 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02294 {
02295 #ifdef DEBUG_MIMEMAGIC
02296 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02297 #endif
02298 conf->resultBuf = QString::null;
02299
02300 if ( !magicResult )
02301 magicResult = new KMimeMagicResult();
02302 magicResult->setInvalid();
02303 conf->accuracy = 100;
02304
02305 if ( !conf->utimeConf )
02306 conf->utimeConf = new KMimeMagicUtimeConf();
02307
02308
02309 process(conf, fn );
02310
02311
02312
02313 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02314 magicResult->setAccuracy(conf->accuracy);
02315 refineResult(magicResult, fn);
02316 return magicResult;
02317 }