00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "kmimemagic.h"
00020 #include <kdebug.h>
00021 #include <kapplication.h>
00022 #include <qfile.h>
00023 #include <ksimpleconfig.h>
00024 #include <kstandarddirs.h>
00025 #include <kstaticdeleter.h>
00026 #include <klargefile.h>
00027 #include <assert.h>
00028
00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00030 static void process(struct config_rec* conf, const QString &);
00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00032 static int tagmagic(unsigned char *buf, int nbytes);
00033 static int textmagic(struct config_rec* conf, unsigned char *, int);
00034
00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00036 static int match(struct config_rec* conf, unsigned char *, int);
00037
00038 KMimeMagic* KMimeMagic::s_pSelf;
00039 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00040
00041 KMimeMagic* KMimeMagic::self()
00042 {
00043 if( !s_pSelf )
00044 initStatic();
00045 return s_pSelf;
00046 }
00047
00048 void KMimeMagic::initStatic()
00049 {
00050 s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00051 s_pSelf->setFollowLinks( true );
00052 }
00053
00054 #include <stdio.h>
00055 #include <unistd.h>
00056 #include <stdlib.h>
00057 #include <sys/wait.h>
00058 #include <sys/types.h>
00059 #include <sys/stat.h>
00060 #include <fcntl.h>
00061 #include <errno.h>
00062 #include <ctype.h>
00063 #include <time.h>
00064 #include <utime.h>
00065 #include <stdarg.h>
00066 #include <qregexp.h>
00067 #include <qstring.h>
00068
00069
00070
00071
00072
00073
00074
00075
00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00077 #define DEBUG_LINENUMBERS
00078 #endif
00079
00080
00081
00082
00083 #define MIME_BINARY_UNKNOWN "application/octet-stream"
00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00085 #define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00086 #define MIME_TEXT_UNKNOWN "text/plain"
00087 #define MIME_TEXT_PLAIN "text/plain"
00088 #define MIME_INODE_DIR "inode/directory"
00089 #define MIME_INODE_CDEV "inode/chardevice"
00090 #define MIME_INODE_BDEV "inode/blockdevice"
00091 #define MIME_INODE_FIFO "inode/fifo"
00092 #define MIME_INODE_LINK "inode/link"
00093 #define MIME_INODE_SOCK "inode/socket"
00094
00095 #define MIME_APPL_TROFF "application/x-troff"
00096 #define MIME_APPL_TAR "application/x-tar"
00097 #define MIME_TEXT_FORTRAN "text/x-fortran"
00098
00099 #define MAXMIMESTRING 256
00100
00101 #define HOWMANY 4000
00102 #define MAXDESC 50
00103 #define MAXstring 64
00104
00105 typedef union VALUETYPE {
00106 unsigned char b;
00107 unsigned short h;
00108 unsigned long l;
00109 char s[MAXstring];
00110 unsigned char hs[2];
00111 unsigned char hl[4];
00112 } VALUETYPE;
00113
00114 struct magic {
00115 struct magic *next;
00116 #ifdef DEBUG_LINENUMBERS
00117 int lineno;
00118 #endif
00119
00120 short flag;
00121 #define INDIR 1
00122 #define UNSIGNED 2
00123 short cont_level;
00124 struct {
00125 char type;
00126 long offset;
00127 } in;
00128 long offset;
00129 unsigned char reln;
00130 char type;
00131 char vallen;
00132 #define BYTE 1
00133 #define SHORT 2
00134 #define LONG 4
00135 #define STRING 5
00136 #define DATE 6
00137 #define BESHORT 7
00138 #define BELONG 8
00139 #define BEDATE 9
00140 #define LESHORT 10
00141 #define LELONG 11
00142 #define LEDATE 12
00143 VALUETYPE value;
00144 unsigned long mask;
00145 char nospflag;
00146
00147
00148 char desc[MAXDESC];
00149 };
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165 #define RECORDSIZE 512
00166 #define NAMSIZ 100
00167 #define TUNMLEN 32
00168 #define TGNMLEN 32
00169
00170 union record {
00171 char charptr[RECORDSIZE];
00172 struct header {
00173 char name[NAMSIZ];
00174 char mode[8];
00175 char uid[8];
00176 char gid[8];
00177 char size[12];
00178 char mtime[12];
00179 char chksum[8];
00180 char linkflag;
00181 char linkname[NAMSIZ];
00182 char magic[8];
00183 char uname[TUNMLEN];
00184 char gname[TGNMLEN];
00185 char devmajor[8];
00186 char devminor[8];
00187 } header;
00188 };
00189
00190
00191 #define TMAGIC "ustar "
00192
00193
00194
00195
00196 static int is_tar(unsigned char *, int);
00197 static unsigned long signextend(struct magic *, unsigned long);
00198 static int getvalue(struct magic *, char **);
00199 static int hextoint(int);
00200 static char *getstr(char *, char *, int, int *);
00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00202 static int mcheck(union VALUETYPE *, struct magic *);
00203 static int mconvert(union VALUETYPE *, struct magic *);
00204 static long from_oct(int, char *);
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 #define L_HTML 0x001
00221 #define L_C 0x002
00222 #define L_MAKE 0x004
00223 #define L_PLI 0x008
00224 #define L_MACH 0x010
00225 #define L_PAS 0x020
00226 #define L_JAVA 0x040
00227 #define L_CPP 0x080
00228 #define L_MAIL 0x100
00229 #define L_NEWS 0x200
00230 #define L_DIFF 0x400
00231 #define L_OBJC 0x800
00232
00233 #define P_HTML 0
00234 #define P_C 1
00235 #define P_MAKE 2
00236 #define P_PLI 3
00237 #define P_MACH 4
00238 #define P_PAS 5
00239 #define P_JAVA 6
00240 #define P_CPP 7
00241 #define P_MAIL 8
00242 #define P_NEWS 9
00243 #define P_DIFF 10
00244 #define P_OBJC 11
00245
00246 typedef struct asc_type {
00247 const char *type;
00248 int kwords;
00249 double weight;
00250 } asc_type;
00251
00252 static const asc_type types[] = {
00253 { "text/html", 19, 2 },
00254 { "text/x-c", 13, 1 },
00255 { "text/x-makefile", 4, 1.9 },
00256 { "text/x-pli", 1, 3 },
00257 { "text/x-assembler", 6, 2.1 },
00258 { "text/x-pascal", 1, 1 },
00259 { "text/x-java", 12, 1 },
00260 { "text/x-c++", 19, 1 },
00261 { "message/rfc822", 4, 1.9 },
00262 { "message/news", 3, 2 },
00263 { "text/x-diff", 4, 2 },
00264 { "text/x-objc", 10, 1 }
00265 };
00266
00267 #define NTYPES (sizeof(types)/sizeof(asc_type))
00268
00269 static struct names {
00270 const char *name;
00271 short type;
00272 } const names[] = {
00273 {
00274 "<html", L_HTML
00275 },
00276 {
00277 "<HTML", L_HTML
00278 },
00279 {
00280 "<head", L_HTML
00281 },
00282 {
00283 "<HEAD", L_HTML
00284 },
00285 {
00286 "<body", L_HTML
00287 },
00288 {
00289 "<BODY", L_HTML
00290 },
00291 {
00292 "<title", L_HTML
00293 },
00294 {
00295 "<TITLE", L_HTML
00296 },
00297 {
00298 "<h1", L_HTML
00299 },
00300 {
00301 "<H1", L_HTML
00302 },
00303 {
00304 "<a", L_HTML
00305 },
00306 {
00307 "<A", L_HTML
00308 },
00309 {
00310 "<img", L_HTML
00311 },
00312 {
00313 "<IMG", L_HTML
00314 },
00315 {
00316 "<!--", L_HTML
00317 },
00318 {
00319 "<!doctype", L_HTML
00320 },
00321 {
00322 "<!DOCTYPE", L_HTML
00323 },
00324 {
00325 "<div", L_HTML
00326 },
00327 {
00328 "<DIV", L_HTML
00329 },
00330 {
00331 "<frame", L_HTML
00332 },
00333 {
00334 "<FRAME", L_HTML
00335 },
00336 {
00337 "<frameset", L_HTML
00338 },
00339 {
00340 "<FRAMESET", L_HTML
00341 },
00342 {
00343 "<script", L_HTML
00344 },
00345 {
00346 "<SCRIPT", L_HTML
00347 },
00348 {
00349 "/*", L_C|L_CPP|L_JAVA|L_OBJC
00350 },
00351 {
00352 "//", L_C|L_CPP|L_JAVA|L_OBJC
00353 },
00354 {
00355 "#include", L_C|L_CPP
00356 },
00357 {
00358 "#ifdef", L_C|L_CPP
00359 },
00360 {
00361 "#ifndef", L_C|L_CPP
00362 },
00363 {
00364 "bool", L_C|L_CPP
00365 },
00366 {
00367 "char", L_C|L_CPP|L_JAVA|L_OBJC
00368 },
00369 {
00370 "int", L_C|L_CPP|L_JAVA|L_OBJC
00371 },
00372 {
00373 "float", L_C|L_CPP|L_JAVA|L_OBJC
00374 },
00375 {
00376 "void", L_C|L_CPP|L_JAVA|L_OBJC
00377 },
00378 {
00379 "extern", L_C|L_CPP
00380 },
00381 {
00382 "struct", L_C|L_CPP
00383 },
00384 {
00385 "union", L_C|L_CPP
00386 },
00387 {
00388 "implements", L_JAVA
00389 },
00390 {
00391 "super", L_JAVA
00392 },
00393 {
00394 "import", L_JAVA
00395 },
00396 {
00397 "class", L_CPP|L_JAVA
00398 },
00399 {
00400 "public", L_CPP|L_JAVA
00401 },
00402 {
00403 "private", L_CPP|L_JAVA
00404 },
00405 {
00406 "explicit", L_CPP
00407 },
00408 {
00409 "virtual", L_CPP
00410 },
00411 {
00412 "namespace", L_CPP
00413 },
00414 {
00415 "#import", L_OBJC
00416 },
00417 {
00418 "@interface", L_OBJC
00419 },
00420 {
00421 "@implementation", L_OBJC
00422 },
00423 {
00424 "@protocol", L_OBJC
00425 },
00426 {
00427 "CFLAGS", L_MAKE
00428 },
00429 {
00430 "LDFLAGS", L_MAKE
00431 },
00432 {
00433 "all:", L_MAKE
00434 },
00435 {
00436 ".PHONY:", L_MAKE
00437 },
00438 {
00439 "srcdir", L_MAKE
00440 },
00441 {
00442 "exec_prefix", L_MAKE
00443 },
00444
00445
00446
00447
00448 {
00449 ".ascii", L_MACH
00450 },
00451 {
00452 ".asciiz", L_MACH
00453 },
00454 {
00455 ".byte", L_MACH
00456 },
00457 {
00458 ".even", L_MACH
00459 },
00460 {
00461 ".globl", L_MACH
00462 },
00463 {
00464 "clr", L_MACH
00465 },
00466 {
00467 "(input", L_PAS
00468 },
00469 {
00470 "dcl", L_PLI
00471 },
00472 {
00473 "Received:", L_MAIL
00474 },
00475
00476
00477
00478 {
00479 "Return-Path:", L_MAIL
00480 },
00481 {
00482 "Cc:", L_MAIL
00483 },
00484 {
00485 "Newsgroups:", L_NEWS
00486 },
00487 {
00488 "Path:", L_NEWS
00489 },
00490 {
00491 "Organization:", L_NEWS
00492 },
00493 {
00494 "---", L_DIFF
00495 },
00496 {
00497 "+++", L_DIFF
00498 },
00499 {
00500 "***", L_DIFF
00501 },
00502 {
00503 "@@", L_DIFF
00504 },
00505 {
00506 NULL, 0
00507 }
00508 };
00509
00520 class KMimeMagicUtimeConf
00521 {
00522 public:
00523 KMimeMagicUtimeConf()
00524 {
00525 tmpDirs << QString::fromLatin1("/tmp");
00526
00527
00528
00529 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00530 if ( !confDirs.isEmpty() )
00531 {
00532 QString globalConf = confDirs.last() + "kmimemagicrc";
00533 if ( QFile::exists( globalConf ) )
00534 {
00535 KSimpleConfig cfg( globalConf );
00536 cfg.setGroup( "Settings" );
00537 tmpDirs = cfg.readListEntry( "atimeDirs" );
00538 }
00539 if ( confDirs.count() > 1 )
00540 {
00541 QString localConf = confDirs.first() + "kmimemagicrc";
00542 if ( QFile::exists( localConf ) )
00543 {
00544 KSimpleConfig cfg( localConf );
00545 cfg.setGroup( "Settings" );
00546 tmpDirs += cfg.readListEntry( "atimeDirs" );
00547 }
00548 }
00549 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00550 {
00551 QString dir = *it;
00552 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00553 (*it) += '/';
00554 }
00555 }
00556 #if 0
00557
00558 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00559 kdDebug(7018) << " atimeDir: " << *it << endl;
00560 #endif
00561 }
00562
00563 bool restoreAccessTime( const QString & file ) const
00564 {
00565 QString dir = file.left( file.findRev( '/' ) );
00566 bool res = tmpDirs.contains( dir );
00567
00568 return res;
00569 }
00570 QStringList tmpDirs;
00571 };
00572
00573
00574 struct config_rec {
00575 bool followLinks;
00576 QString resultBuf;
00577 int accuracy;
00578
00579 struct magic *magic,
00580 *last;
00581 KMimeMagicUtimeConf * utimeConf;
00582 };
00583
00584 #ifdef MIME_MAGIC_DEBUG_TABLE
00585 static void
00586 test_table()
00587 {
00588 struct magic *m;
00589 struct magic *prevm = NULL;
00590
00591 kdDebug(7018) << "test_table : started" << endl;
00592 for (m = conf->magic; m; m = m->next) {
00593 if (isprint((((unsigned long) m) >> 24) & 255) &&
00594 isprint((((unsigned long) m) >> 16) & 255) &&
00595 isprint((((unsigned long) m) >> 8) & 255) &&
00596 isprint(((unsigned long) m) & 255)) {
00597
00598
00599 (((unsigned long) m) >> 24) & 255,
00600 (((unsigned long) m) >> 16) & 255,
00601 (((unsigned long) m) >> 8) & 255,
00602 ((unsigned long) m) & 255,
00603 prevm ? prevm->lineno : -1);
00604 break;
00605 }
00606 prevm = m;
00607 }
00608 }
00609 #endif
00610
00611 #define EATAB {while (isascii((unsigned char) *l) && \
00612 isspace((unsigned char) *l)) ++l;}
00613
00614 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00615 {
00616 int ws_offset;
00617
00618
00619 if (line[0]) {
00620 line[strlen(line) - 1] = '\0';
00621 }
00622
00623 ws_offset = 0;
00624 while (line[ws_offset] && isspace(line[ws_offset])) {
00625 ws_offset++;
00626 }
00627
00628
00629 if (line[ws_offset] == 0) {
00630 return 0;
00631 }
00632
00633 if (line[ws_offset] == '#')
00634 return 0;
00635
00636
00637 (*rule)++;
00638
00639
00640 return (parse(line + ws_offset, lineno) != 0);
00641 }
00642
00643
00644
00645
00646 int KMimeMagic::apprentice( const QString& magicfile )
00647 {
00648 FILE *f;
00649 char line[BUFSIZ + 1];
00650 int errs = 0;
00651 int lineno;
00652 int rule = 0;
00653 QCString fname;
00654
00655 if (magicfile.isEmpty())
00656 return -1;
00657 fname = QFile::encodeName(magicfile);
00658 f = fopen(fname, "r");
00659 if (f == NULL) {
00660 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00661 return -1;
00662 }
00663
00664
00665 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00666 if (parse_line(line, &rule, lineno))
00667 errs++;
00668
00669 fclose(f);
00670
00671 #ifdef DEBUG_APPRENTICE
00672 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00673 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00674 #endif
00675
00676 #ifdef MIME_MAGIC_DEBUG_TABLE
00677 test_table();
00678 #endif
00679
00680 return (errs ? -1 : 0);
00681 }
00682
00683 int KMimeMagic::buff_apprentice(char *buff)
00684 {
00685 char line[BUFSIZ + 2];
00686 int errs = 0;
00687 int lineno = 1;
00688 char *start = buff;
00689 char *end;
00690 int count = 0;
00691 int rule = 0;
00692 int len = strlen(buff) + 1;
00693
00694
00695 do {
00696 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00697 strncpy(line, start, count);
00698 line[count] = '\0';
00699 if ((end = strchr(line, '\n'))) {
00700 *(++end) = '\0';
00701 count = strlen(line);
00702 } else
00703 strcat(line, "\n");
00704 start += count;
00705 len -= count;
00706 if (parse_line(line, &rule, lineno))
00707 errs++;
00708 lineno++;
00709 } while (len > 0);
00710
00711 #ifdef DEBUG_APPRENTICE
00712 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00713 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00714 #endif
00715
00716 #ifdef MIME_MAGIC_DEBUG_TABLE
00717 test_table();
00718 #endif
00719
00720 return (errs ? -1 : 0);
00721 }
00722
00723
00724
00725
00726 static unsigned long
00727 signextend(struct magic *m, unsigned long v)
00728 {
00729 if (!(m->flag & UNSIGNED))
00730 switch (m->type) {
00731
00732
00733
00734
00735
00736 case BYTE:
00737 v = (char) v;
00738 break;
00739 case SHORT:
00740 case BESHORT:
00741 case LESHORT:
00742 v = (short) v;
00743 break;
00744 case DATE:
00745 case BEDATE:
00746 case LEDATE:
00747 case LONG:
00748 case BELONG:
00749 case LELONG:
00750 v = (long) v;
00751 break;
00752 case STRING:
00753 break;
00754 default:
00755 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00756 return 998;
00757 }
00758 return v;
00759 }
00760
00761
00762
00763
00764 int KMimeMagic::parse(char *l, int
00765 #ifdef DEBUG_LINENUMBERS
00766 lineno
00767 #endif
00768 )
00769 {
00770 int i = 0;
00771 struct magic *m;
00772 char *t,
00773 *s;
00774
00775 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00776 kdError(7018) << "parse: Out of memory." << endl;
00777 return -1;
00778 }
00779
00780 m->next = NULL;
00781 if (!conf->magic || !conf->last) {
00782 conf->magic = conf->last = m;
00783 } else {
00784 conf->last->next = m;
00785 conf->last = m;
00786 }
00787
00788
00789 m->flag = 0;
00790 m->cont_level = 0;
00791 #ifdef DEBUG_LINENUMBERS
00792 m->lineno = lineno;
00793 #endif
00794
00795 while (*l == '>') {
00796 ++l;
00797 m->cont_level++;
00798 }
00799
00800 if (m->cont_level != 0 && *l == '(') {
00801 ++l;
00802 m->flag |= INDIR;
00803 }
00804
00805 m->offset = (int) strtol(l, &t, 0);
00806 if (l == t) {
00807 kdError(7018) << "parse: offset " << l << " invalid" << endl;
00808 }
00809 l = t;
00810
00811 if (m->flag & INDIR) {
00812 m->in.type = LONG;
00813 m->in.offset = 0;
00814
00815
00816
00817 if (*l == '.') {
00818 switch (*++l) {
00819 case 'l':
00820 m->in.type = LONG;
00821 break;
00822 case 's':
00823 m->in.type = SHORT;
00824 break;
00825 case 'b':
00826 m->in.type = BYTE;
00827 break;
00828 default:
00829 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00830 break;
00831 }
00832 l++;
00833 }
00834 s = l;
00835 if (*l == '+' || *l == '-')
00836 l++;
00837 if (isdigit((unsigned char) *l)) {
00838 m->in.offset = strtol(l, &t, 0);
00839 if (*s == '-')
00840 m->in.offset = -m->in.offset;
00841 } else
00842 t = l;
00843 if (*t++ != ')') {
00844 kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00845 }
00846 l = t;
00847 }
00848 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00849 ++l;
00850 EATAB;
00851
00852 #define NBYTE 4
00853 #define NSHORT 5
00854 #define NLONG 4
00855 #define NSTRING 6
00856 #define NDATE 4
00857 #define NBESHORT 7
00858 #define NBELONG 6
00859 #define NBEDATE 6
00860 #define NLESHORT 7
00861 #define NLELONG 6
00862 #define NLEDATE 6
00863
00864 if (*l == 'u') {
00865 ++l;
00866 m->flag |= UNSIGNED;
00867 }
00868
00869 if (strncmp(l, "byte", NBYTE) == 0) {
00870 m->type = BYTE;
00871 l += NBYTE;
00872 } else if (strncmp(l, "short", NSHORT) == 0) {
00873 m->type = SHORT;
00874 l += NSHORT;
00875 } else if (strncmp(l, "long", NLONG) == 0) {
00876 m->type = LONG;
00877 l += NLONG;
00878 } else if (strncmp(l, "string", NSTRING) == 0) {
00879 m->type = STRING;
00880 l += NSTRING;
00881 } else if (strncmp(l, "date", NDATE) == 0) {
00882 m->type = DATE;
00883 l += NDATE;
00884 } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00885 m->type = BESHORT;
00886 l += NBESHORT;
00887 } else if (strncmp(l, "belong", NBELONG) == 0) {
00888 m->type = BELONG;
00889 l += NBELONG;
00890 } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00891 m->type = BEDATE;
00892 l += NBEDATE;
00893 } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00894 m->type = LESHORT;
00895 l += NLESHORT;
00896 } else if (strncmp(l, "lelong", NLELONG) == 0) {
00897 m->type = LELONG;
00898 l += NLELONG;
00899 } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00900 m->type = LEDATE;
00901 l += NLEDATE;
00902 } else {
00903 kdError(7018) << "parse: type " << l << " invalid" << endl;
00904 return -1;
00905 }
00906
00907 if (*l == '&') {
00908 ++l;
00909 m->mask = signextend(m, strtol(l, &l, 0));
00910 } else
00911 m->mask = (unsigned long) ~0L;
00912 EATAB;
00913
00914 switch (*l) {
00915 case '>':
00916 case '<':
00917
00918 case '&':
00919 case '^':
00920 case '=':
00921 m->reln = *l;
00922 ++l;
00923 break;
00924 case '!':
00925 if (m->type != STRING) {
00926 m->reln = *l;
00927 ++l;
00928 break;
00929 }
00930
00931 default:
00932 if (*l == 'x' && isascii((unsigned char) l[1]) &&
00933 isspace((unsigned char) l[1])) {
00934 m->reln = *l;
00935 ++l;
00936 goto GetDesc;
00937 }
00938 m->reln = '=';
00939 break;
00940 }
00941 EATAB;
00942
00943 if (getvalue(m, &l))
00944 return -1;
00945
00946
00947
00948 GetDesc:
00949 EATAB;
00950 if (l[0] == '\b') {
00951 ++l;
00952 m->nospflag = 1;
00953 } else if ((l[0] == '\\') && (l[1] == 'b')) {
00954 ++l;
00955 ++l;
00956 m->nospflag = 1;
00957 } else
00958 m->nospflag = 0;
00959
00960 while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00961 m->desc[i++] = *l++;
00962 m->desc[i] = '\0';
00963
00964 while (--i>0 && isspace( m->desc[i] ))
00965 m->desc[i] = '\0';
00966
00967
00968
00969
00970 #ifdef DEBUG_APPRENTICE
00971 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00972 #endif
00973 return 0;
00974 }
00975
00976
00977
00978
00979
00980
00981 static int
00982 getvalue(struct magic *m, char **p)
00983 {
00984 int slen;
00985
00986 if (m->type == STRING) {
00987 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00988 m->vallen = slen;
00989 } else if (m->reln != 'x')
00990 m->value.l = signextend(m, strtol(*p, p, 0));
00991 return 0;
00992 }
00993
00994
00995
00996
00997
00998
00999 static char *
01000 getstr(register char *s, register char *p, int plen, int *slen)
01001 {
01002 char *origs = s,
01003 *origp = p;
01004 char *pmax = p + plen - 1;
01005 register int c;
01006 register int val;
01007
01008 while ((c = *s++) != '\0') {
01009 if (isspace((unsigned char) c))
01010 break;
01011 if (p >= pmax) {
01012 kdError(7018) << "String too long: " << origs << endl;
01013 break;
01014 }
01015 if (c == '\\') {
01016 switch (c = *s++) {
01017
01018 case '\0':
01019 goto out;
01020
01021 default:
01022 *p++ = (char) c;
01023 break;
01024
01025 case 'n':
01026 *p++ = '\n';
01027 break;
01028
01029 case 'r':
01030 *p++ = '\r';
01031 break;
01032
01033 case 'b':
01034 *p++ = '\b';
01035 break;
01036
01037 case 't':
01038 *p++ = '\t';
01039 break;
01040
01041 case 'f':
01042 *p++ = '\f';
01043 break;
01044
01045 case 'v':
01046 *p++ = '\v';
01047 break;
01048
01049
01050 case '0':
01051 case '1':
01052 case '2':
01053 case '3':
01054 case '4':
01055 case '5':
01056 case '6':
01057 case '7':
01058 val = c - '0';
01059 c = *s++;
01060 if (c >= '0' && c <= '7') {
01061 val = (val << 3) | (c - '0');
01062 c = *s++;
01063 if (c >= '0' && c <= '7')
01064 val = (val << 3) | (c - '0');
01065 else
01066 --s;
01067 } else
01068 --s;
01069 *p++ = (char) val;
01070 break;
01071
01072
01073 case 'x':
01074 val = 'x';
01075 c = hextoint(*s++);
01076 if (c >= 0) {
01077 val = c;
01078 c = hextoint(*s++);
01079 if (c >= 0) {
01080 val = (val << 4) + c;
01081 c = hextoint(*s++);
01082 if (c >= 0) {
01083 val = (val << 4) + c;
01084 } else
01085 --s;
01086 } else
01087 --s;
01088 } else
01089 --s;
01090 *p++ = (char) val;
01091 break;
01092 }
01093 } else
01094 *p++ = (char) c;
01095 }
01096 out:
01097 *p = '\0';
01098 *slen = p - origp;
01099
01100
01101 return s;
01102 }
01103
01104
01105
01106 static int
01107 hextoint(int c)
01108 {
01109 if (!isascii((unsigned char) c))
01110 return -1;
01111 if (isdigit((unsigned char) c))
01112 return c - '0';
01113 if ((c >= 'a') && (c <= 'f'))
01114 return c + 10 - 'a';
01115 if ((c >= 'A') && (c <= 'F'))
01116 return c + 10 - 'A';
01117 return -1;
01118 }
01119
01120
01121
01122
01123 static int
01124 mconvert(union VALUETYPE *p, struct magic *m)
01125 {
01126 switch (m->type) {
01127 case BYTE:
01128 return 1;
01129 case STRING:
01130
01131 p->s[sizeof(p->s) - 1] = '\0';
01132 return 1;
01133 #ifndef WORDS_BIGENDIAN
01134 case SHORT:
01135 #endif
01136 case BESHORT:
01137 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01138 return 1;
01139 #ifndef WORDS_BIGENDIAN
01140 case LONG:
01141 case DATE:
01142 #endif
01143 case BELONG:
01144 case BEDATE:
01145 p->l = (long)
01146 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01147 return 1;
01148 #ifdef WORDS_BIGENDIAN
01149 case SHORT:
01150 #endif
01151 case LESHORT:
01152 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01153 return 1;
01154 #ifdef WORDS_BIGENDIAN
01155 case LONG:
01156 case DATE:
01157 #endif
01158 case LELONG:
01159 case LEDATE:
01160 p->l = (long)
01161 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01162 return 1;
01163 default:
01164 kdError(7018) << "mconvert: invalid type " << m->type << endl;
01165 return 0;
01166 }
01167 }
01168
01169
01170 static int
01171 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01172 int nbytes)
01173 {
01174 long offset = m->offset;
01175 switch ( m->type )
01176 {
01177 case BYTE:
01178 if ( offset + 1 > nbytes-1 )
01179 return 0;
01180 break;
01181 case SHORT:
01182 case BESHORT:
01183 case LESHORT:
01184 if ( offset + 2 > nbytes-1 )
01185 return 0;
01186 break;
01187 case LONG:
01188 case BELONG:
01189 case LELONG:
01190 case DATE:
01191 case BEDATE:
01192 case LEDATE:
01193 if ( offset + 4 > nbytes-1 )
01194 return 0;
01195 break;
01196 case STRING:
01197 break;
01198 }
01199
01200
01201
01202
01203 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01204 {
01205 int have = nbytes - offset;
01206 memset(p, 0, sizeof(union VALUETYPE));
01207 if (have > 0)
01208 memcpy(p, s + offset, have);
01209 } else
01210 memcpy(p, s + offset, sizeof(union VALUETYPE));
01211
01212 if (!mconvert(p, m))
01213 return 0;
01214
01215 if (m->flag & INDIR) {
01216
01217 switch (m->in.type) {
01218 case BYTE:
01219 offset = p->b + m->in.offset;
01220 break;
01221 case SHORT:
01222 offset = p->h + m->in.offset;
01223 break;
01224 case LONG:
01225 offset = p->l + m->in.offset;
01226 break;
01227 }
01228
01229 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01230 return 0;
01231
01232 memcpy(p, s + offset, sizeof(union VALUETYPE));
01233
01234 if (!mconvert(p, m))
01235 return 0;
01236 }
01237 return 1;
01238 }
01239
01240 static int
01241 mcheck(union VALUETYPE *p, struct magic *m)
01242 {
01243 register unsigned long l = m->value.l;
01244 register unsigned long v;
01245 int matched;
01246
01247 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01248 kdError(7018) << "BOINK" << endl;
01249 return 1;
01250 }
01251 switch (m->type) {
01252 case BYTE:
01253 v = p->b;
01254 break;
01255
01256 case SHORT:
01257 case BESHORT:
01258 case LESHORT:
01259 v = p->h;
01260 break;
01261
01262 case LONG:
01263 case BELONG:
01264 case LELONG:
01265 case DATE:
01266 case BEDATE:
01267 case LEDATE:
01268 v = p->l;
01269 break;
01270
01271 case STRING:
01272 l = 0;
01273
01274
01275
01276
01277
01278 v = 0;
01279 {
01280 register unsigned char *a = (unsigned char *) m->value.s;
01281 register unsigned char *b = (unsigned char *) p->s;
01282 register int len = m->vallen;
01283 Q_ASSERT(len);
01284
01285 while (--len >= 0)
01286 if ((v = *b++ - *a++) != 0)
01287 break;
01288 }
01289 break;
01290 default:
01291 kdError(7018) << "mcheck: invalid type " << m->type << endl;
01292 return 0;
01293 }
01294 #if 0
01295 qDebug("Before signextend %08x", v);
01296 #endif
01297 v = signextend(m, v) & m->mask;
01298 #if 0
01299 qDebug("After signextend %08x", v);
01300 #endif
01301
01302 switch (m->reln) {
01303 case 'x':
01304 matched = 1;
01305 break;
01306
01307 case '!':
01308 matched = v != l;
01309 break;
01310
01311 case '=':
01312 matched = v == l;
01313 break;
01314
01315 case '>':
01316 if (m->flag & UNSIGNED)
01317 matched = v > l;
01318 else
01319 matched = (long) v > (long) l;
01320 break;
01321
01322 case '<':
01323 if (m->flag & UNSIGNED)
01324 matched = v < l;
01325 else
01326 matched = (long) v < (long) l;
01327 break;
01328
01329 case '&':
01330 matched = (v & l) == l;
01331 break;
01332
01333 case '^':
01334 matched = (v & l) != l;
01335 break;
01336
01337 default:
01338 matched = 0;
01339 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01340 break;
01341 }
01342
01343 return matched;
01344 }
01345
01346
01347
01348
01349
01350
01351 void process(struct config_rec* conf, const QString & fn)
01352 {
01353 int fd = 0;
01354 unsigned char buf[HOWMANY + 1];
01355 KDE_struct_stat sb;
01356 int nbytes = 0;
01357 int tagbytes = 0;
01358 QCString fileName = QFile::encodeName( fn );
01359
01360
01361
01362
01363 if (fsmagic(conf, fileName, &sb) != 0) {
01364
01365 return;
01366 }
01367 if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01368
01369
01370
01371
01372
01373
01374 conf->resultBuf = MIME_BINARY_UNREADABLE;
01375 return;
01376 }
01377
01378
01379
01380 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01381 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01382 conf->resultBuf = MIME_BINARY_UNREADABLE;
01383 (void)close(fd);
01384 return;
01385 }
01386 if ((tagbytes = tagmagic(buf, nbytes))) {
01387
01388 lseek(fd, tagbytes, SEEK_SET);
01389 nbytes = read(fd, (char*)buf, HOWMANY);
01390 if (nbytes < 0) {
01391 conf->resultBuf = MIME_BINARY_UNREADABLE;
01392 (void)close(fd);
01393 return;
01394 }
01395 }
01396 if (nbytes == 0) {
01397 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01398 } else {
01399 buf[nbytes++] = '\0';
01400 tryit(conf, buf, nbytes);
01401 }
01402
01403 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01404 {
01405
01406
01407
01408
01409
01410 struct utimbuf utbuf;
01411 utbuf.actime = sb.st_atime;
01412 utbuf.modtime = sb.st_mtime;
01413 (void) utime(fileName, &utbuf);
01414 }
01415 (void) close(fd);
01416 }
01417
01418
01419 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01420 {
01421
01422 if (match(conf, buf, nb))
01423 return;
01424
01425
01426 if (ascmagic(conf, buf, nb) == 1)
01427 return;
01428
01429
01430 if (textmagic(conf, buf, nb))
01431 return;
01432
01433
01434 conf->resultBuf = MIME_BINARY_UNKNOWN;
01435 conf->accuracy = 0;
01436 }
01437
01438 static int
01439 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01440 {
01441 int ret = 0;
01442
01443
01444
01445
01446
01447 ret = KDE_lstat(fn, sb);
01448
01449 if (ret) {
01450 return 1;
01451
01452 }
01453
01454
01455
01456
01457
01458
01459 switch (sb->st_mode & S_IFMT) {
01460 case S_IFDIR:
01461 conf->resultBuf = MIME_INODE_DIR;
01462 return 1;
01463 case S_IFCHR:
01464 conf->resultBuf = MIME_INODE_CDEV;
01465 return 1;
01466 case S_IFBLK:
01467 conf->resultBuf = MIME_INODE_BDEV;
01468 return 1;
01469
01470 #ifdef S_IFIFO
01471 case S_IFIFO:
01472 conf->resultBuf = MIME_INODE_FIFO;
01473 return 1;
01474 #endif
01475 #ifdef S_IFLNK
01476 case S_IFLNK:
01477 {
01478 char buf[BUFSIZ + BUFSIZ + 4];
01479 register int nch;
01480 KDE_struct_stat tstatbuf;
01481
01482 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01483 conf->resultBuf = MIME_INODE_LINK;
01484
01485 return 1;
01486 }
01487