/* * Javascript Compressor * * Copyright (c) 2008-2018 Fabrice Bellard * Copyright (c) 2017-2018 Charlie Gordon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include #include #include #include #include #include #include #include "cutils.h" typedef struct JSToken { int tok; char buf[20]; char *str; int len; int size; int line_num; /* line number for start of token */ int lines; /* number of embedded linefeeds in token */ } JSToken; enum { TOK_EOF = 256, TOK_IDENT, TOK_STR1, TOK_STR2, TOK_STR3, TOK_NUM, TOK_COM, TOK_LCOM, }; void tok_reset(JSToken *tt) { if (tt->str != tt->buf) { free(tt->str); tt->str = tt->buf; tt->size = sizeof(tt->buf); } tt->len = 0; } void tok_add_ch(JSToken *tt, int c) { if (tt->len + 1 > tt->size) { tt->size *= 2; if (tt->str == tt->buf) { tt->str = malloc(tt->size); memcpy(tt->str, tt->buf, tt->len); } else { tt->str = realloc(tt->str, tt->size); } } tt->str[tt->len++] = c; } FILE *infile; const char *filename; int output_line_num; int line_num; int ch; JSToken tokc; int skip_mask; #define DEFINE_MAX 20 char *define_tab[DEFINE_MAX]; int define_len; void error(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (filename) { fprintf(stderr, "%s:%d: ", filename, line_num); } else { fprintf(stderr, "jscompress: "); } vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); exit(1); } void define_symbol(const char *def) { int i; for (i = 0; i < define_len; i++) { if (!strcmp(tokc.str, define_tab[i])) return; } if (define_len >= DEFINE_MAX) error("too many defines"); define_tab[define_len++] = strdup(def); } void undefine_symbol(const char *def) { int i, j; for (i = j = 0; i < define_len; i++) { if (!strcmp(tokc.str, define_tab[i])) { free(define_tab[i]); } else { define_tab[j++] = define_tab[i]; } } define_len = j; } const char *find_symbol(const char *def) { int i; for (i = 0; i < define_len; i++) { if (!strcmp(tokc.str, define_tab[i])) return "1"; } return NULL; } void next(void); void nextch(void) { ch = fgetc(infile); if (ch == '\n') line_num++; } int skip_blanks(void) { for (;;) { next(); if (tokc.tok != ' ' && tokc.tok != '\t' && tokc.tok != TOK_COM && tokc.tok != TOK_LCOM) return tokc.tok; } } void parse_directive(void) { int ifdef, mask = skip_mask; /* simplistic preprocessor: #define / #undef / #ifdef / #ifndef / #else / #endif no symbol substitution. */ skip_mask = 0; /* disable skipping to parse preprocessor line */ nextch(); if (skip_blanks() != TOK_IDENT) error("expected preprocessing directive after #"); if (!strcmp(tokc.str, "define")) { if (skip_blanks() != TOK_IDENT) error("expected identifier after #define"); define_symbol(tokc.str); } else if (!strcmp(tokc.str, "undef")) { if (skip_blanks() != TOK_IDENT) error("expected identifier after #undef"); undefine_symbol(tokc.str); } else if ((ifdef = 1, !strcmp(tokc.str, "ifdef")) || (ifdef = 0, !strcmp(tokc.str, "ifndef"))) { if (skip_blanks() != TOK_IDENT) error("expected identifier after #ifdef/#ifndef"); mask = (mask << 2) | 2 | ifdef; if (find_symbol(tokc.str)) mask ^= 1; } else if (!strcmp(tokc.str, "else")) { if (!(mask & 2)) error("#else without a #if"); mask ^= 1; } else if (!strcmp(tokc.str, "endif")) { if (!(mask & 2)) error("#endif without a #if"); mask >>= 2; } else { error("unsupported preprocessing directive"); } if (skip_blanks() != '\n') error("extra characters on preprocessing line"); skip_mask = mask; } /* return -1 if invalid char */ static int hex_to_num(int ch) { if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10; else if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10; else if (ch >= '0' && ch <= '9') return ch - '0'; else return -1; } void next(void) { again: tok_reset(&tokc); tokc.line_num = line_num; tokc.lines = 0; switch(ch) { case EOF: tokc.tok = TOK_EOF; if (skip_mask) error("missing #endif"); break; case 'a' ... 'z': case 'A' ... 'Z': case '_': case '$': tok_add_ch(&tokc, ch); nextch(); while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || (ch == '_' || ch == '$')) { tok_add_ch(&tokc, ch); nextch(); } tok_add_ch(&tokc, '\0'); tokc.tok = TOK_IDENT; break; case '.': nextch(); if (ch >= '0' && ch <= '9') { tok_add_ch(&tokc, '.'); goto has_dot; } tokc.tok = '.'; break; case '0': tok_add_ch(&tokc, ch); nextch(); if (ch == 'x' || ch == 'X') { /* hexa */ tok_add_ch(&tokc, ch); nextch(); while ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') || (ch >= '0' && ch <= '9')) { tok_add_ch(&tokc, ch); nextch(); } tok_add_ch(&tokc, '\0'); tokc.tok = TOK_NUM; break; } goto has_digit; case '1' ... '9': tok_add_ch(&tokc, ch); nextch(); has_digit: /* decimal */ while (ch >= '0' && ch <= '9') { tok_add_ch(&tokc, ch); nextch(); } if (ch == '.') { tok_add_ch(&tokc, ch); nextch(); has_dot: while (ch >= '0' && ch <= '9') { tok_add_ch(&tokc, ch); nextch(); } } if (ch == 'e' || ch == 'E') { tok_add_ch(&tokc, ch); nextch(); if (ch == '+' || ch == '-') { tok_add_ch(&tokc, ch); nextch(); } while (ch >= '0' && ch <= '9') { tok_add_ch(&tokc, ch); nextch(); } } tok_add_ch(&tokc, '\0'); tokc.tok = TOK_NUM; break; case '`': { nextch(); while (ch != '`' && ch != EOF) { if (ch == '\\') { tok_add_ch(&tokc, ch); nextch(); if (ch == EOF) { error("unexpected char after '\\'"); } tok_add_ch(&tokc, ch); } else { tok_add_ch(&tokc, ch); nextch(); } } nextch(); tok_add_ch(&tokc, 0); tokc.tok = TOK_STR3; } break; case '\"': case '\'': { int n, i, c, hex_digit_count; int quote_ch; quote_ch = ch; nextch(); while (ch != quote_ch && ch != EOF) { if (ch == '\\') { nextch(); switch(ch) { case 'n': tok_add_ch(&tokc, '\n'); nextch(); break; case 'r': tok_add_ch(&tokc, '\r'); nextch(); break; case 't': tok_add_ch(&tokc, '\t'); nextch(); break; case 'v': tok_add_ch(&tokc, '\v'); nextch(); break; case '\"': case '\'': case '\\': tok_add_ch(&tokc, ch); nextch(); break; case '0' ... '7': n = 0; while (ch >= '0' && ch <= '7') { n = n * 8 + (ch - '0'); nextch(); } tok_add_ch(&tokc, n); break; case 'x': case 'u': if (ch == 'x') hex_digit_count = 2; else hex_digit_count = 4; nextch(); n = 0; for(i = 0; i < hex_digit_count; i++) { c = hex_to_num(ch); if (c < 0) error("unexpected char after '\\x'"); n = n * 16 + c; nextch(); } if (n >= 256) error("unicode is currently unsupported"); tok_add_ch(&tokc, n); break; default: error("unexpected char after '\\'"); } } else { /* XXX: should refuse embedded newlines */ tok_add_ch(&tokc, ch); nextch(); } } nextch(); tok_add_ch(&tokc, 0); if (quote_ch == '\'') tokc.tok = TOK_STR1; else tokc.tok = TOK_STR2; } break; case '/': nextch(); if (ch == '/') { tok_add_ch(&tokc, '/'); tok_add_ch(&tokc, ch); nextch(); while (ch != '\n' && ch != EOF) { tok_add_ch(&tokc, ch); nextch(); } tok_add_ch(&tokc, '\0'); tokc.tok = TOK_LCOM; } else if (ch == '*') { int last; tok_add_ch(&tokc, '/'); tok_add_ch(&tokc, ch); last = 0; for(;;) { nextch(); if (ch == EOF) error("unterminated comment"); if (ch == '\n') tokc.lines++; tok_add_ch(&tokc, ch); if (last == '*' && ch == '/') break; last = ch; } nextch(); tok_add_ch(&tokc, '\0'); tokc.tok = TOK_COM; } else { tokc.tok = '/'; } break; case '#': parse_directive(); goto again; case '\n': /* adjust line number */ tokc.line_num--; tokc.lines++; /* fall thru */ default: tokc.tok = ch; nextch(); break; } if (skip_mask & 1) goto again; } void print_tok(FILE *f, JSToken *tt) { /* keep output lines in sync with input lines */ while (output_line_num < tt->line_num) { putc('\n', f); output_line_num++; } switch(tt->tok) { case TOK_IDENT: case TOK_COM: case TOK_LCOM: fprintf(f, "%s", tt->str); break; case TOK_NUM: { unsigned long a; char *p; a = strtoul(tt->str, &p, 0); if (*p == '\0' && a <= 0x7fffffff) { /* must be an integer */ fprintf(f, "%d", (int)a); } else { fprintf(f, "%s", tt->str); } } break; case TOK_STR3: fprintf(f, "`%s`", tt->str); break; case TOK_STR1: case TOK_STR2: { int i, c, quote_ch; if (tt->tok == TOK_STR1) quote_ch = '\''; else quote_ch = '\"'; fprintf(f, "%c", quote_ch); for(i = 0; i < tt->len - 1; i++) { c = (uint8_t)tt->str[i]; switch(c) { case '\r': fprintf(f, "\\r"); break; case '\n': fprintf(f, "\\n"); break; case '\t': fprintf(f, "\\t"); break; case '\v': fprintf(f, "\\v"); break; case '\"': case '\'': if (c == quote_ch) fprintf(f, "\\%c", c); else fprintf(f, "%c", c); break; case '\\': fprintf(f, "\\\\"); break; default: /* XXX: no utf-8 support! */ if (c >= 32 && c <= 255) { fprintf(f, "%c", c); } else if (c <= 255) fprintf(f, "\\x%02x", c); else fprintf(f, "\\u%04x", c); break; } } fprintf(f, "%c", quote_ch); } break; default: if (tokc.tok >= 256) error("unsupported token in print_tok: %d", tt->tok); fprintf(f, "%c", tt->tok); break; } output_line_num += tt->lines; } /* check if token pasting could occur */ static BOOL compat_token(int c1, int c2) { if ((c1 == TOK_IDENT || c1 == TOK_NUM) && (c2 == TOK_IDENT || c2 == TOK_NUM)) return FALSE; if ((c1 == c2 && strchr("+-<>&|=*/.", c1)) || (c2 == '=' && strchr("+-<>&|!*/^%", c1)) || (c1 == '=' && c2 == '>') || (c1 == '/' && c2 == '*') || (c1 == '.' && c2 == TOK_NUM) || (c1 == TOK_NUM && c2 == '.')) return FALSE; return TRUE; } void js_compress(const char *filename, const char *outfilename, BOOL do_strip, BOOL keep_header) { FILE *outfile; int ltok, seen_space; line_num = 1; infile = fopen(filename, "rb"); if (!infile) { perror(filename); exit(1); } output_line_num = 1; outfile = fopen(outfilename, "wb"); if (!outfile) { perror(outfilename); exit(1); } nextch(); next(); ltok = 0; seen_space = 0; if (do_strip) { if (keep_header) { while (tokc.tok == ' ' || tokc.tok == '\n' || tokc.tok == '\t' || tokc.tok == '\v' || tokc.tok == '\b' || tokc.tok == '\f') { seen_space = 1; next(); } if (tokc.tok == TOK_COM) { print_tok(outfile, &tokc); //fprintf(outfile, "\n"); ltok = tokc.tok; seen_space = 0; next(); } } for(;;) { if (tokc.tok == TOK_EOF) break; if (tokc.tok == ' ' || tokc.tok == '\r' || tokc.tok == '\t' || tokc.tok == '\v' || tokc.tok == '\b' || tokc.tok == '\f' || tokc.tok == TOK_LCOM || tokc.tok == TOK_COM) { /* don't print spaces or comments */ seen_space = 1; } else if (tokc.tok == TOK_STR3) { print_tok(outfile, &tokc); ltok = tokc.tok; seen_space = 0; } else if (tokc.tok == TOK_STR1 || tokc.tok == TOK_STR2) { int count, i; /* find the optimal quote char */ count = 0; for(i = 0; i < tokc.len; i++) { if (tokc.str[i] == '\'') count++; else if (tokc.str[i] == '\"') count--; } if (count > 0) tokc.tok = TOK_STR2; else if (count < 0) tokc.tok = TOK_STR1; print_tok(outfile, &tokc); ltok = tokc.tok; seen_space = 0; } else { if (seen_space && !compat_token(ltok, tokc.tok)) { fprintf(outfile, " "); } print_tok(outfile, &tokc); ltok = tokc.tok; seen_space = 0; } next(); } } else { /* just handle preprocessing */ while (tokc.tok != TOK_EOF) { print_tok(outfile, &tokc); next(); } } fclose(outfile); fclose(infile); } #define HASH_SIZE 30011 #define MATCH_LEN_MIN 3 #define MATCH_LEN_MAX (4 + 63) #define DIST_MAX 65535 static int find_longest_match(int *pdist, const uint8_t *src, int src_len, const int *hash_next, int cur_pos) { int pos, i, match_len, match_pos, pos_min, len_max; len_max = min_int(src_len - cur_pos, MATCH_LEN_MAX); match_len = 0; match_pos = 0; pos_min = max_int(cur_pos - DIST_MAX - 1, 0); pos = hash_next[cur_pos]; while (pos >= pos_min) { for(i = 0; i < len_max; i++) { if (src[cur_pos + i] != src[pos + i]) break; } if (i > match_len) { match_len = i; match_pos = pos; } pos = hash_next[pos]; } *pdist = cur_pos - match_pos - 1; return match_len; } int lz_compress(uint8_t **pdst, const uint8_t *src, int src_len) { int *hash_table, *hash_next; uint32_t h, v; int i, dist, len, len1, dist1; uint8_t *dst, *q; /* build the hash table */ hash_table = malloc(sizeof(hash_table[0]) * HASH_SIZE); for(i = 0; i < HASH_SIZE; i++) hash_table[i] = -1; hash_next = malloc(sizeof(hash_next[0]) * src_len); for(i = 0; i < src_len; i++) hash_next[i] = -1; for(i = 0; i < src_len - MATCH_LEN_MIN + 1; i++) { h = ((src[i] << 16) | (src[i + 1] << 8) | src[i + 2]) % HASH_SIZE; hash_next[i] = hash_table[h]; hash_table[h] = i; } for(;i < src_len; i++) { hash_next[i] = -1; } free(hash_table); dst = malloc(src_len + 4); /* never larger than the source */ q = dst; *q++ = src_len >> 24; *q++ = src_len >> 16; *q++ = src_len >> 8; *q++ = src_len >> 0; /* compress */ i = 0; while (i < src_len) { if (src[i] >= 128) return -1; len = find_longest_match(&dist, src, src_len, hash_next, i); if (len >= MATCH_LEN_MIN) { /* heuristic: see if better length just after */ len1 = find_longest_match(&dist1, src, src_len, hash_next, i + 1); if (len1 > len) goto no_match; } if (len < MATCH_LEN_MIN) { no_match: *q++ = src[i]; i++; } else if (len <= (3 + 15) && dist < (1 << 10)) { v = 0x8000 | ((len - 3) << 10) | dist; *q++ = v >> 8; *q++ = v; i += len; } else if (len >= 4 && len <= (4 + 63) && dist < (1 << 16)) { v = 0xc00000 | ((len - 4) << 16) | dist; *q++ = v >> 16; *q++ = v >> 8; *q++ = v; i += len; } else { goto no_match; } } free(hash_next); *pdst = dst; return q - dst; } static int load_file(uint8_t **pbuf, const char *filename) { FILE *f; uint8_t *buf; int buf_len; f = fopen(filename, "rb"); if (!f) { perror(filename); exit(1); } fseek(f, 0, SEEK_END); buf_len = ftell(f); fseek(f, 0, SEEK_SET); buf = malloc(buf_len + 1); fread(buf, 1, buf_len, f); buf[buf_len] = '\0'; fclose(f); *pbuf = buf; return buf_len; } static void save_file(const char *filename, const uint8_t *buf, int buf_len) { FILE *f; f = fopen(filename, "wb"); if (!f) { perror(filename); exit(1); } fwrite(buf, 1, buf_len, f); fclose(f); } static void save_c_source(const char *filename, const uint8_t *buf, int buf_len, const char *var_name) { FILE *f; int i; f = fopen(filename, "wb"); if (!f) { perror(filename); exit(1); } fprintf(f, "/* This file is automatically generated - do not edit */\n\n"); fprintf(f, "const uint8_t %s[] = {\n", var_name); for(i = 0; i < buf_len; i++) { fprintf(f, " 0x%02x,", buf[i]); if ((i % 8) == 7 || (i == buf_len - 1)) fprintf(f, "\n"); } fprintf(f, "};\n"); fclose(f); } #define DEFAULT_OUTPUT_FILENAME "out.js" void help(void) { printf("jscompress version 1.0 Copyright (c) 2008-2018 Fabrice Bellard\n" "usage: jscompress [options] filename\n" "Javascript compressor\n" "\n" "-h print this help\n" "-n do not compress spaces\n" "-H keep the first comment\n" "-c compress to file\n" "-C name compress to C source ('name' is the variable name)\n" "-D symbol define preprocessor symbol\n" "-U symbol undefine preprocessor symbol\n" "-o outfile set the output filename (default=%s)\n", DEFAULT_OUTPUT_FILENAME); exit(1); } int main(int argc, char **argv) { int c, do_strip, keep_header, compress; const char *out_filename, *c_var, *fname; char tmpfilename[1024]; do_strip = 1; keep_header = 0; out_filename = DEFAULT_OUTPUT_FILENAME; compress = 0; c_var = NULL; for(;;) { c = getopt(argc, argv, "hno:HcC:D:U:"); if (c == -1) break; switch(c) { case 'h': help(); break; case 'n': do_strip = 0; break; case 'o': out_filename = optarg; break; case 'H': keep_header = 1; break; case 'c': compress = 1; break; case 'C': c_var = optarg; compress = 1; break; case 'D': define_symbol(optarg); break; case 'U': undefine_symbol(optarg); break; } } if (optind >= argc) help(); filename = argv[optind++]; if (compress) { #if defined(__ANDROID__) /* XXX: use another directory ? */ snprintf(tmpfilename, sizeof(tmpfilename), "out.%d", getpid()); #else snprintf(tmpfilename, sizeof(tmpfilename), "/tmp/out.%d", getpid()); #endif fname = tmpfilename; } else { fname = out_filename; } js_compress(filename, fname, do_strip, keep_header); if (compress) { uint8_t *buf1, *buf2; int buf1_len, buf2_len; buf1_len = load_file(&buf1, fname); unlink(fname); buf2_len = lz_compress(&buf2, buf1, buf1_len); if (buf2_len < 0) { fprintf(stderr, "Could not compress file (UTF8 chars are forbidden)\n"); exit(1); } if (c_var) { save_c_source(out_filename, buf2, buf2_len, c_var); } else { save_file(out_filename, buf2, buf2_len); } free(buf1); free(buf2); } return 0; }