/* * cutof text-lines * * Nicholas Christopoulos */ #include #include #include #include #include #include typedef char * char_p; #define OVECCOUNT 30 /* should be a multiple of 3 */ /////////////////////////////////////////////////////////////////////////////////////////////////////////// // classic... text buffers // strdup using new[] char *pstrdup(const char *source) { char *ns = new char[strlen(source)+1]; strcpy(ns, source); return ns; } // typical node (abstract class) class node { public: node *next; node() { next = NULL; } virtual ~node() { } }; // typical list (abstract class) class list : public node { public: node *head, *tail; list() : node() { head = tail = NULL; } virtual ~list() { clear(); } // clean up void clear() { node *cp, *pp; cp = head; while ( cp ) { pp = cp; cp = cp->next; delete pp; } } // connect a new node void connect(node *np) { if ( !head ) head = tail = np; else ( tail->next = np, tail = np ); } }; // text node / text line class tnode : public node { public: char *line; pcre *re; int type; char *data; tnode(const char *src) : node() { line = pstrdup(src); type = 0; data = NULL; } virtual ~tnode() { delete[] line; if (data) delete[] data; } void setData(const char *s) { if ( data ) delete[] data; data = pstrdup(s); } }; // text list class tlist : public list { public: tlist() : list() { } virtual ~tlist() { } // add a new line tnode *add(const char *text) { tnode *np = new tnode(text); connect(np); return np; } }; // buffer list class buflist : public list { public: buflist() : list() { } virtual ~buflist() { } // add a new buffer tlist *add() { tlist *np = new tlist(); connect(np); return np; } }; /////////////////////////////////////////////////////////////////////////////////////////////////////////// //buflist *buffers; tlist ignstart; tlist ignstop; tlist ignline; tlist redir; enum cltype { usr_strstr, use_pcre, use_re }; /* ignore empty lines more than this */ static int ignlines = 1; /* ignore block on/off */ static int ignore = 0; /* ignore control characters */ static int ignctrl = 0; // another panic routine void die(bool what, const char *fmt, ...) { if ( what ) { char buf[4096]; va_list ap; va_start(ap, fmt); vsnprintf(buf, 4096, fmt, ap); va_end(ap); perror(buf); exit(1); } } // void cleantext(const char *src, char *dst) { const char *p = src; char *d = dst; while ( *p ) { if ( *p == '\b' ) { d --; p ++; } else { if ( *p >= ' ' ) *d ++ = *p ++; else p ++; } } *d = '\0'; } // void rmprefix(char *buf, char ch) { char *p; p = strchr(buf, ch); if ( p ) { p ++; while ( *p == ' ' || *p == '\t' ) p ++; strcpy(buf, p); } } // void rmsuffix(char *buf, char ch) { char *p; p = strchr(buf, ch); if ( p ) *p = '\0'; } /* * true if c is a white-space */ int is_wspace(int c) { return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\b'); } /* * removes spaces and returns a new string */ char *trimdup(const char *str) { char *buf; char *p; buf = (char *) malloc(strlen(str)+1); strcpy(buf, str); if ( *str == '\0' ) return buf; p = (char *) str; while ( is_wspace(*p) ) p ++; strcpy(buf, p); if ( *p != '\0' ) { p = buf; while ( *p ) p ++; p --; while ( p > buf && is_wspace(*p) ) p --; p ++; *p = '\0'; } return buf; } /* */ tnode *conf_line(int line, tlist *list, const char *text) { if ( strncmp(text, "@re:", 4) == 0 ) { const char *error; int errofs; tnode *c = (tnode *) list->add(text+4); c->type = use_pcre; c->re = pcre_compile(text+4, 0, &error, &errofs, NULL); die(c->re == NULL, "pcre_compile(), failed (line %d, offset %d)\n-> %s", line, error, errofs); return c; } return (tnode *) list->add(text); } /* */ void read_config(const char *file) { FILE *fp; int mode = 0; char buf[1024], *p; int line = 0; fp = fopen(file, "rt"); if ( fp ) { while ( fgets(buf, 1024, fp) ) { if ( buf[strlen(buf)-1] == '\n' ) buf[strlen(buf)-1] = '\0'; line ++; if ( strlen(buf) ) { if ( strstr(buf, "@remove_text_lines") ) mode = 1; else if ( strstr(buf, "@ignore_block_start") ) mode = 2; else if ( strstr(buf, "@ignore_block_stop") ) mode = 3; else if ( strstr(buf, "@max_empty_lines") ) mode = 4; else if ( strstr(buf, "@default_ignore_state") ) mode = 5; else if ( strstr(buf, "@remove_control_chars") ) mode = 6; else if ( strstr(buf, "@redirect") ) mode = 7; else { switch ( mode ) { case 1: conf_line(line, &ignline, buf); break; case 2: conf_line(line, &ignstart, buf); break; case 3: conf_line(line, &ignstop, buf); break; case 4: ignlines = atoi(buf); break; case 5: ignore = atoi(buf); break; case 6: ignctrl = atoi(buf); break; case 7: p = strstr(buf, ":"); if ( p ) { *p = '\0'; tnode *t = conf_line(line, &redir, p+1); t->setData(buf); } else die(true, "syntax error"); break; default: fprintf(stderr, "I don't know what to do [%s]\n", buf); } } } } fclose(fp); } else fprintf(stderr, "config not found [%s]\n", file); } /* */ tnode *check_list(tlist *list, const char *text) { tnode *t; int rc; int erroffset; int ovector[OVECCOUNT]; t = (tnode *) list->head; while ( t ) { switch ( t->type ) { case use_pcre: rc = pcre_exec(t->re, NULL, text, strlen(text), 0, 0, ovector, OVECCOUNT); if ( rc >= 0 ) return t; break; default: if ( strstr(text, t->line) ) return t; } t = (tnode *) t->next; } return NULL; } /* */ int main(int argc, char *argv[]) { char buf[1024], *trimed; int lnsp = 0, i, idx, c; int no_store = 0, arg_mode = 0; char conf[1024]; char file[1024]; FILE *fp, *fout; tnode *t; // init strcpy(conf, "cutof.conf"); fp = stdin; fout = stdout; // read args for ( i = 1; i < argc; i ++ ) { if ( argv[i][0] == '-' ) { if ( strcmp(argv[i], "-f") == 0 ) arg_mode = 1; else if ( strcmp(argv[i], "-rc") == 0 ) ignctrl = 1; else if ( strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 ) { printf("usage: cutof [-f config-file] [-rc] [input-file]\n"); printf("-rc = enable cut control characters\n"); return 1; } else fprintf(stderr, "unknown argument [%s]\n", argv[i]); } else { if ( arg_mode == 1 ) { strcpy(conf, argv[i]); arg_mode = 0; } else { strcpy(file, argv[i]); fp = fopen(file, "rt"); if ( !fp ) { fprintf(stderr, "cannot open 'input-file' [%s]\n", file); return 1; } } } } read_config(conf); // start input while ( !feof(fp) ) { // read text-line idx = 0; while ( !feof(fp) ) { c = fgetc(fp); if ( c == 0 ) c = '\r'; else if ( c == '\n' ) break; if ( c != '\r' ) { if ( ignctrl ) { if ( c >= 32 ) { buf[idx] = c; idx ++; } } else { buf[idx] = c; idx ++; } } } buf[idx] = '\0'; trimed = trimdup(buf); //////////// if ( ignore ) { // inside ignored block if ( check_list(&ignstop, buf) ) { ignore = 0; lnsp = 0; } } else { // inside not-ignored block // check start-ignore block if ( check_list(&ignstart, buf) ) ignore = 1; // no_store = 0; // check ignore text-line if ( check_list(&ignline, buf) ) no_store = 1; // check empty lines if ( strlen(trimed) == 0 ) { lnsp ++; if ( lnsp > ignlines ) no_store = 1; } else { if ( no_store == 0 ) lnsp = 0; } // check for redirection if ( (t = check_list(&redir, buf)) != NULL ) { if ( fout != stdout ) fclose(fout); fout = fopen(t->data, "wt"); if ( !fout ) { fprintf(stderr, "cannot redir [%s]\n", t->data); fout = stdout; } } } // print if ( no_store == 0 && ignore == 0 ) fprintf(fout, "%s\n", buf); free(trimed); } return 0; }