From 550a6488c9e2241e2979317c04d6d73752618402 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 2 Jul 2019 17:39:56 +0200 Subject: [PATCH] Filter: documentation of the M4 preprocessor --- filter/decl.m4 | 351 +++++++++++++++++++++++++++++++----------------- filter/f-inst.c | 11 +- 2 files changed, 234 insertions(+), 128 deletions(-) diff --git a/filter/decl.m4 b/filter/decl.m4 index cc069485..20119c1d 100644 --- a/filter/decl.m4 +++ b/filter/decl.m4 @@ -6,149 +6,70 @@ m4_divert(-1)m4_dnl # # Can be freely distributed and used under the terms of the GNU GPL. # +# THIS IS A M4 MACRO FILE GENERATING 3 FILES ALTOGETHER. +# KEEP YOUR HANDS OFF UNLESS YOU KNOW WHAT YOU'RE DOING. +# EDITING AND DEBUGGING THIS FILE MAY DAMAGE YOUR BRAIN SERIOUSLY. # -# Global Diversions: -# 4 enum fi_code -# 5 enum fi_code to string -# 6 dump line item -# 7 dump line item callers -# 8 linearize -# 9 same (filter comparator) -# 1 union in struct f_inst -# 3 constructors + interpreter +# But you're welcome to read and edit and debug if you aren't scared. +# +# Uncomment the following line to get exhaustive debug output. +# m4_debugmode(aceflqtx) +# +# How it works: +# 1) Instruction to code conversion (uses diversions 100..199) +# 2) Code wrapping (uses diversions 1..99) +# 3) Final preparation (uses diversions 200..299) +# 4) Shipout +# +# See below for detailed description. +# +# +# 1) Instruction to code conversion +# The code provided in f-inst.c between consecutive INST() calls +# is interleaved for many different places. It is here processed +# and split into separate instances where split-by-instruction +# happens. These parts are stored in temporary diversions listed: # -# Per-inst Diversions: # 101 content of per-inst struct # 102 constructor arguments # 103 constructor body # 104 dump line item content +# (there may be nothing in dump-line content and +# it must be handled specially in phase 2) # 105 linearize body # 106 comparator body # 107 struct f_line_item content # 108 interpreter body # -# Final diversions -# 200+ completed text before it is flushed to output - -m4_dnl m4_debugmode(aceflqtx) - -m4_define(FID_ZONE, `m4_divert($1) /* $2 for INST_NAME() */') -m4_define(FID_INST, `FID_ZONE(1, Instruction structure for config)') -m4_define(FID_LINE, `FID_ZONE(2, Instruction structure for interpreter)') -m4_define(FID_NEW, `FID_ZONE(3, Constructor)') -m4_define(FID_ENUM, `FID_ZONE(4, Code enum)') -m4_define(FID_ENUM_STR, `FID_ZONE(5, Code enum to string)') -m4_define(FID_DUMP, `FID_ZONE(6, Dump line)') -m4_define(FID_DUMP_CALLER, `FID_ZONE(7, Dump line caller)') -m4_define(FID_LINEARIZE, `FID_ZONE(8, Linearize)') -m4_define(FID_SAME, `FID_ZONE(9, Comparison)') - +# Here are macros to allow you to _divert to the right directions. m4_define(FID_STRUCT_IN, `m4_divert(101)') m4_define(FID_NEW_ARGS, `m4_divert(102)') m4_define(FID_NEW_BODY, `m4_divert(103)') m4_define(FID_DUMP_BODY, `m4_divert(104)m4_define([[FID_DUMP_BODY_EXISTS]])') -m4_define(FID_LINEARIZE_BODY, `m4_divert(105)m4_define([[FID_LINEARIZE_BODY_EXISTS]])') +m4_define(FID_LINEARIZE_BODY, `m4_divert(105)') m4_define(FID_SAME_BODY, `m4_divert(106)') m4_define(FID_LINE_IN, `m4_divert(107)') m4_define(FID_INTERPRET_BODY, `m4_divert(108)') -m4_define(FID_ALL, `FID_INTERPRET_BODY'); +# Sometimes you want slightly different code versions in different +# outputs. +# Use FID_HIC(code for inst-gen.h, code for inst-gen.c, code for inst-interpret.c) +# and put it into [[ ]] quotes if it shall contain commas. m4_define(FID_HIC, `m4_ifelse(TARGET, [[H]], [[$1]], TARGET, [[I]], [[$2]], TARGET, [[C]], [[$3]])') +# In interpreter code, this is quite common. m4_define(FID_INTERPRET_EXEC, `FID_HIC(,[[FID_INTERPRET_BODY()]],[[m4_divert(-1)]])') m4_define(FID_INTERPRET_NEW, `FID_HIC(,[[m4_divert(-1)]],[[FID_INTERPRET_BODY()]])') + +# If the instruction is never converted to constant, the interpret +# code is not produced at all for constructor m4_define(NEVER_CONSTANT, `m4_define([[INST_NEVER_CONSTANT]])') m4_define(FID_IFCONST, `m4_ifdef([[INST_NEVER_CONSTANT]],[[$2]],[[$1]])') -m4_define(INST_FLUSH, `m4_ifdef([[INST_NAME]], [[ -FID_ENUM -INST_NAME(), -FID_ENUM_STR -[INST_NAME()] = "INST_NAME()", -FID_INST -struct { -m4_undivert(101) -} i_[[]]INST_NAME(); -FID_LINE -struct { -m4_undivert(107) -} i_[[]]INST_NAME(); -FID_NEW -FID_HIC( -[[ -struct f_inst *f_new_inst_]]INST_NAME()[[(enum f_instruction_code fi_code -m4_undivert(102) -);]], -[[ - case INST_NAME(): - #define whati (&(what->i_]]INST_NAME()[[)) - m4_ifelse(m4_eval(INST_INVAL() > 0), 1, [[if (fstk->vcnt < INST_INVAL()) runtime("Stack underflow"); fstk->vcnt -= INST_INVAL(); ]]) - m4_undivert(108) - #undef whati - break; -]], -[[ -struct f_inst *f_new_inst_]]INST_NAME()[[(enum f_instruction_code fi_code -m4_undivert(102) -) - { - struct f_inst *what = fi_new(fi_code); - FID_IFCONST([[uint constargs = 1;]]) - #define whati (&(what->i_]]INST_NAME()[[)) - m4_undivert(103) - FID_IFCONST([[if (!constargs)]]) - return what; - FID_IFCONST([[m4_undivert(108)]]) - #undef whati - } -]]) - -FID_DUMP_CALLER -case INST_NAME(): f_dump_line_item_]]INST_NAME()[[(item, indent + 1); break; - -FID_DUMP -m4_ifdef([[FID_DUMP_BODY_EXISTS]], -[[static inline void f_dump_line_item_]]INST_NAME()[[(const struct f_line_item *item_, const int indent)]], -[[static inline void f_dump_line_item_]]INST_NAME()[[(const struct f_line_item *item UNUSED, const int indent UNUSED)]]) -m4_undefine([[FID_DUMP_BODY_EXISTS]]) -{ -#define item (&(item_->i_]]INST_NAME()[[)) -m4_undivert(104) -#undef item -} - -FID_LINEARIZE -case INST_NAME(): { -#define whati (&(what->i_]]INST_NAME()[[)) -#define item (&(dest->items[pos].i_]]INST_NAME()[[)) - m4_undivert(105) -#undef whati -#undef item - dest->items[pos].fi_code = what->fi_code; - dest->items[pos].lineno = what->lineno; - break; -} -m4_undefine([[FID_LINEARIZE_BODY_EXISTS]]) - -FID_SAME -case INST_NAME(): -#define f1 (&(f1_->i_]]INST_NAME()[[)) -#define f2 (&(f2_->i_]]INST_NAME()[[)) -m4_undivert(106) -#undef f1 -#undef f2 -break; -m4_divert(-1)FID_FLUSH(101,200) -]])') - -m4_define(INST, `m4_dnl -INST_FLUSH()m4_dnl -m4_define([[INST_NAME]], [[$1]])m4_dnl -m4_define([[INST_INVAL]], [[$2]])m4_dnl -m4_undefine([[INST_NEVER_CONSTANT]])m4_dnl -FID_ALL() m4_dnl -') - +# If the instruction has some attributes (here called members), +# these are typically carried with the instruction from constructor +# to interpreter. This yields a line of code everywhere on the path. +# FID_MEMBER is a macro to help with this task. m4_define(FID_MEMBER, `m4_dnl FID_LINE_IN $1 $2; @@ -170,8 +91,14 @@ debug("%s$4\n", INDENT, $5); ]]) FID_INTERPRET_EXEC const $1 $2 = whati->$2 -FID_ALL') +FID_INTERPRET_BODY') +# Instruction arguments are needed only until linearization is done. +# This puts the arguments into the filter line to be executed before +# the instruction itself. +# +# To achieve this, ARG_ANY must be called before anything writes into +# the instruction line as it moves the instruction pointer forward. m4_define(ARG_ANY, ` FID_STRUCT_IN struct f_inst * f$1; @@ -188,14 +115,17 @@ FID_IFCONST([[ } FID_LINEARIZE_BODY pos = linearize(dest, whati->f$1, pos); -FID_ALL()') +FID_INTERPRET_BODY()') +# Some arguments need to check their type. After that, ARG_ANY is called. m4_define(ARG, `ARG_ANY($1) FID_INTERPRET_EXEC() if (v$1.type != $2) runtime("Argument $1 of instruction %s must be of type $2, got 0x%02x", f_instruction_name(what->fi_code), v$1.type)m4_dnl -FID_ALL()') +FID_INTERPRET_BODY()') -m4_define(LINEX, `FID_INTERPRET_EXEC()LINEX_($1)FID_INTERPRET_NEW()return $1 FID_ALL()') +# Executing another filter line. This replaces the recursion +# that was needed in the former implementation. +m4_define(LINEX, `FID_INTERPRET_EXEC()LINEX_($1)FID_INTERPRET_NEW()return $1 FID_INTERPRET_BODY()') m4_define(LINEX_, `do { fstk->estk[fstk->ecnt].pos = 0; fstk->estk[fstk->ecnt].line = $1; @@ -226,13 +156,16 @@ do { if (whati->fl$1) { } } while(0) FID_INTERPRET_NEW return whati->f$1 -FID_ALL()') +FID_INTERPRET_BODY()') +# Some of the instructions have a result. These constructions +# state the result and put it to the right place. m4_define(RESULT, `RESULT_VAL([[ (struct f_val) { .type = $1, .val.$2 = $3 } ]])') m4_define(RESULT_VAL, `FID_HIC(, [[do { res = $1; fstk->vcnt++; } while (0)]], [[return fi_constant(what, $1)]])') m4_define(RESULT_VOID, `RESULT_VAL([[ (struct f_val) { .type = T_VOID } ]])') +# Some common filter instruction members m4_define(SYMBOL, `FID_MEMBER(struct symbol *, sym, [[strcmp(f1->sym->name, f2->sym->name) || (f1->sym->class != f2->sym->class)]], symbol %s, item->sym->name)') m4_define(RTC, `FID_MEMBER(struct rtable_config *, rtc, [[strcmp(f1->rtc->name, f2->rtc->name)]], route table %s, item->rtc->name)') @@ -240,13 +173,174 @@ m4_define(STATIC_ATTR, `FID_MEMBER(struct f_static_attr, sa, f1->sa.sa_code != f m4_define(DYNAMIC_ATTR, `FID_MEMBER(struct f_dynamic_attr, da, f1->da.ea_code != f2->da.ea_code,,)') m4_define(ACCESS_RTE, `NEVER_CONSTANT()') +# 2) Code wrapping +# The code produced in 1xx temporary diversions is a raw code without +# any auxiliary commands and syntactical structures around. When the +# instruction is done, INST_FLUSH is called. More precisely, it is called +# at the beginning of INST() call and at the end of file. +# +# INST_FLUSH picks all the temporary diversions, wraps their content +# into appropriate headers and structures and saves them into global +# diversions listed: +# +# 4 enum fi_code +# 5 enum fi_code to string +# 6 dump line item +# 7 dump line item callers +# 8 linearize +# 9 same (filter comparator) +# 1 union in struct f_inst +# 3 constructors + interpreter +# +# These global diversions contain blocks of code that can be directly +# put into the final file, yet it still can't be written out now as +# every instruction writes to all of these diversions. + +# Code wrapping diversion names +m4_define(FID_ZONE, `m4_divert($1) /* $2 for INST_NAME() */') +m4_define(FID_INST, `FID_ZONE(1, Instruction structure for config)') +m4_define(FID_LINE, `FID_ZONE(2, Instruction structure for interpreter)') +m4_define(FID_NEW, `FID_ZONE(3, Constructor)') +m4_define(FID_ENUM, `FID_ZONE(4, Code enum)') +m4_define(FID_ENUM_STR, `FID_ZONE(5, Code enum to string)') +m4_define(FID_DUMP, `FID_ZONE(6, Dump line)') +m4_define(FID_DUMP_CALLER, `FID_ZONE(7, Dump line caller)') +m4_define(FID_LINEARIZE, `FID_ZONE(8, Linearize)') +m4_define(FID_SAME, `FID_ZONE(9, Comparison)') + +# This macro does all the code wrapping. See inline comments. +m4_define(INST_FLUSH, `m4_ifdef([[INST_NAME]], [[ +FID_ENUM m4_dnl Contents of enum fi_code { ... } +INST_NAME(), +FID_ENUM_STR m4_dnl Contents of const char * indexed by enum fi_code +[INST_NAME()] = "INST_NAME()", +FID_INST m4_dnl Anonymous structure inside struct f_inst +struct { +m4_undivert(101) +} i_[[]]INST_NAME(); +FID_LINE m4_dnl Anonymous structure inside struct f_line_item +struct { +m4_undivert(107) +} i_[[]]INST_NAME(); +FID_NEW m4_dnl Constructor and interpreter code together +FID_HIC( +[[ m4_dnl Public declaration of constructor in H file +struct f_inst *f_new_inst_]]INST_NAME()[[(enum f_instruction_code fi_code +m4_undivert(102) +);]], +[[ m4_dnl The one case in The Big Switch inside interpreter + case INST_NAME(): + #define whati (&(what->i_]]INST_NAME()[[)) + m4_ifelse(m4_eval(INST_INVAL() > 0), 1, [[if (fstk->vcnt < INST_INVAL()) runtime("Stack underflow"); fstk->vcnt -= INST_INVAL(); ]]) + m4_undivert(108) + #undef whati + break; +]], +[[ m4_dnl Constructor itself +struct f_inst *f_new_inst_]]INST_NAME()[[(enum f_instruction_code fi_code +m4_undivert(102) +) + { + /* Allocate the structure */ + struct f_inst *what = fi_new(fi_code); + FID_IFCONST([[uint constargs = 1;]]) + + /* Initialize all the members */ + #define whati (&(what->i_]]INST_NAME()[[)) + m4_undivert(103) + + /* If not constant, return the instruction itself */ + FID_IFCONST([[if (!constargs)]]) + return what; + + /* Try to pre-calculate the result */ + FID_IFCONST([[m4_undivert(108)]]) + #undef whati + } +]]) + +FID_DUMP_CALLER m4_dnl Case in another big switch used in instruction dumping (debug) +case INST_NAME(): f_dump_line_item_]]INST_NAME()[[(item, indent + 1); break; + +FID_DUMP m4_dnl The dumper itself +m4_ifdef([[FID_DUMP_BODY_EXISTS]], +[[static inline void f_dump_line_item_]]INST_NAME()[[(const struct f_line_item *item_, const int indent)]], +[[static inline void f_dump_line_item_]]INST_NAME()[[(const struct f_line_item *item UNUSED, const int indent UNUSED)]]) +m4_undefine([[FID_DUMP_BODY_EXISTS]]) +{ +#define item (&(item_->i_]]INST_NAME()[[)) +m4_undivert(104) +#undef item +} + +FID_LINEARIZE m4_dnl The linearizer +case INST_NAME(): { +#define whati (&(what->i_]]INST_NAME()[[)) +#define item (&(dest->items[pos].i_]]INST_NAME()[[)) + m4_undivert(105) +#undef whati +#undef item + dest->items[pos].fi_code = what->fi_code; + dest->items[pos].lineno = what->lineno; + break; +} + +FID_SAME m4_dnl This code compares two f_line"s while reconfiguring +case INST_NAME(): +#define f1 (&(f1_->i_]]INST_NAME()[[)) +#define f2 (&(f2_->i_]]INST_NAME()[[)) +m4_undivert(106) +#undef f1 +#undef f2 +break; + +m4_divert(-1)FID_FLUSH(101,200) m4_dnl And finally this flushes all the unused diversions +]])') + +m4_define(INST, `m4_dnl This macro is called on beginning of each instruction. +INST_FLUSH()m4_dnl First, old data is flushed +m4_define([[INST_NAME]], [[$1]])m4_dnl Then we store instruction name, +m4_define([[INST_INVAL]], [[$2]])m4_dnl instruction input value count +m4_undefine([[INST_NEVER_CONSTANT]])m4_dnl and reset NEVER_CONSTANT trigger. +FID_INTERPRET_BODY() m4_dnl By default, every code is interpreter code. +') + +# 3) Final preparation +# +# Now we prepare all the code around the global diversions. +# It must be here, not in m4wrap, as we want M4 to mark the code +# by #line directives correctly, not to claim that every single line +# is at the beginning of the m4wrap directive. +# +# This part is split by the final file. +# H for inst-gen.h +# I for inst-interpret.c +# C for inst-gen.c +# +# So we in cycle: +# A. open a diversion +# B. send there some code +# C. close that diversion +# D. flush a global diversion +# E. open another diversion and goto B. +# +# Final diversions +# 200+ completed text before it is flushed to output + +# This is a list of output diversions m4_define(FID_WR_PUT_LIST) + +# This macro does the steps C to E, see before. m4_define(FID_WR_PUT_ALSO, `m4_define([[FID_WR_PUT_LIST]],FID_WR_PUT_LIST()[[FID_WR_DPUT(]]FID_WR_DIDX[[)FID_WR_DPUT(]]$1[[)]])m4_define([[FID_WR_DIDX]],m4_eval(FID_WR_DIDX+1))m4_divert(FID_WR_DIDX)') +# These macros do the splitting between H/I/C m4_define(FID_WR_DIRECT, `m4_ifelse(TARGET,[[$1]],[[FID_WR_INIT()]],[[FID_WR_STOP()]])') m4_define(FID_WR_INIT, `m4_define([[FID_WR_DIDX]],200)m4_define([[FID_WR_PUT]],[[FID_WR_PUT_ALSO($]][[@)]])m4_divert(200)') m4_define(FID_WR_STOP, `m4_define([[FID_WR_PUT]])m4_divert(-1)') +# Here is the direct code to be put into the output files +# together with the undiversions, being hidden under FID_WR_PUT() + m4_changequote([[,]]) FID_WR_DIRECT(I) FID_WR_PUT(3) @@ -412,13 +506,24 @@ struct f_line_item { /* Instruction constructors */ FID_WR_PUT(3) - m4_divert(-1) + +# 4) Shipout +# +# Everything is prepared in FID_WR_PUT_LIST now. Let's go! + m4_changequote(`,') +# Flusher auxiliary macro m4_define(FID_FLUSH, `m4_ifelse($1,$2,,[[m4_undivert($1)FID_FLUSH(m4_eval($1+1),$2)]])') + +# Defining the macro used in FID_WR_PUT_LIST m4_define(FID_WR_DPUT, `m4_undivert($1)') +# After the code is read and parsed, we: m4_m4wrap(`INST_FLUSH()m4_divert(0)FID_WR_PUT_LIST()m4_divert(-1)FID_FLUSH(1,200)') m4_changequote([[,]]) +# And now M4 is going to parse f-inst.c, fill the diversions +# and after the file is done, the content of m4_m4wrap (see before) +# is executed. diff --git a/filter/f-inst.c b/filter/f-inst.c index edc97794..5f30ee38 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -167,7 +167,7 @@ } whati->f1 = NULL; } - FID_ALL + FID_INTERPRET_BODY FID_INTERPRET_EXEC if (fstk->vcnt < whati->count) /* TODO: make this check systematic */ @@ -198,7 +198,7 @@ FID_INTERPRET_EXEC fstk->vcnt -= whati->count; - FID_ALL + FID_INTERPRET_BODY pm->len = whati->count; RESULT(T_PATH_MASK, path_mask, pm); @@ -337,7 +337,7 @@ FID_LINEARIZE_BODY { uint opos = pos; - FID_ALL + FID_INTERPRET_BODY ARG_ANY(1); @@ -345,7 +345,7 @@ if (opos < pos) dest->items[pos].flags |= FIF_PRINTED; } - FID_ALL + FID_INTERPRET_BODY FID_MEMBER(enum filter_return, fret, f1->fret != f2->fret, %s, filter_return_str(item->fret)); @@ -1045,7 +1045,8 @@ INST(FI_ASSERT, 1, 0) { /* Birdtest Assert */ NEVER_CONSTANT; ARG(1, T_BOOL); - FID_MEMBER(char *, s, [[strcmp(f1->s, f2->s)]], string \"%s\", item->s); + + FID_MEMBER(char *, s, [[strcmp(f1->s, f2->s)]], string %s, item->s); ASSERT(s);