5.55. Lexer generator

Start ocaml section to src/flx_regen.mli[1 /1 ]
     1: # 4 "./lpsrc/flx_regen.ipk"
     2: open Flx_ast
     3: open Flx_types
     4: 
     5: type reg_kind_t =
     6: [
     7:   | `regmatch of string * string
     8:   | `reglex of string * string * string
     9: ]
    10: 
    11: val regen:
    12:   Buffer.t ->
    13:   range_srcref ->
    14:   regular_args_t ->
    15:   reg_kind_t ->
    16:   (tbexpr_t -> string) ->
    17:   unit
    18: 
End ocaml section to src/flx_regen.mli[1]
Start ocaml section to src/flx_regen.ml[1 /1 ]
     1: # 23 "./lpsrc/flx_regen.ipk"
     2: open Flx_ast
     3: open Flx_types
     4: open Flx_util
     5: 
     6: type reg_kind_t =
     7: [
     8:   | `regmatch of string * string
     9:   | `reglex of string * string * string
    10: ]
    11: 
    12: 
    13: let make_dtran nchars nstates matrix =
    14:   (* transition matrix *)
    15:   let d = Array.make_matrix nchars nstates (-1) in
    16:   Hashtbl.iter
    17:   (fun (c,s1) s2 ->
    18:     d.(c).(s1) <- s2
    19:   )
    20:   matrix
    21:   ;
    22:   d
    23: 
    24: let emit_matrix tack nchars nstates d =
    25:   (* find equivalent chars *)
    26:   tack "  // state->state transition vectors for canonical characters\n";
    27:   let canon = Array.make nchars 0 in
    28:   for i = 0 to nchars - 1 do
    29:     try
    30:       for j = 0 to i do
    31:         if d.(i) = d.(j) then
    32:         begin
    33:           canon.(i) <- j;
    34:           if i = j then
    35:           begin
    36:             tack("  static int s"^si i^"["^si (nstates+1)^"]=\n");
    37:             tack "  {0, // error\n"
    38:             ;
    39:             for state = 0 to nstates - 1 do
    40:               if state mod 16 = 0 then tack "      ";
    41:               let s = "     " ^ si (d.(i).(state)+1) in
    42:               let n = String.length s in
    43:               let s = String.sub s (n-3) 3 in
    44:               tack s;
    45:               if state <> nstates - 1 then tack ", ";
    46:               if state mod 16 = 15 then tack "\n"
    47:             done
    48:             ;
    49:             tack "\n";
    50:             tack "  };\n"
    51:           end;
    52:           raise Not_found;
    53:         end
    54:       done (* j *)
    55:     with Not_found -> ()
    56:   done (* i *)
    57:   ;
    58:   tack ("  //char -> (state->state) lookup\n");
    59:   tack ("  static int *matrix["^si nchars^"] =\n");
    60:   tack "  {\n";
    61:   for i = 0 to nchars - 1  do
    62:     if i mod 16 = 0 then tack "      ";
    63:     let s = "     s" ^ si canon.(i) in
    64:     let n = String.length s in
    65:     let s = String.sub s (n-4) 4 in
    66:     tack s;
    67:     if i <> nchars - 1 then tack ", ";
    68:     if i mod 16 = 15 then tack "\n"
    69:   done;
    70:   tack "  };\n\n"
    71: 
    72: 
    73: let regen b sr (alpha, nstates, cases, matrix) kind ge =
    74:   let nchars = 256 in
    75:   let tack s = bcat b s in
    76: 
    77:   tack ("  // regmatch/lex: error state=0, start state=1, valid states=1 to "^si nstates^"\n");
    78: 
    79:   let d = make_dtran nchars nstates matrix in
    80:   begin match kind with | `reglex _ ->
    81:   tack ("  // accepting states\n");
    82:   tack ("  static int accept["^si (nstates+1)^"]={\n");
    83:   tack (
    84:     "    0," ^
    85:     catmap ","
    86:     (fun i -> match Hashtbl.mem cases i with
    87:     | true -> "1" | false -> "0"
    88:     )
    89:     (nlist nstates)
    90:   );
    91:   tack "\n  };\n";
    92:   | _ -> ()
    93:   end
    94:   ;
    95:   emit_matrix tack nchars nstates d
    96:   ;
    97:   begin match kind with
    98:   | `regmatch (lexeme_start,buffer_end) ->
    99:     tack ("  char const*p = "^lexeme_start^";\n");
   100:     tack ("  int state = 1;\n");
   101:     tack ("  while(state && p != "^buffer_end^")\n");
   102:     tack ("    state = matrix[int(*p++)][state];\n");
   103:     tack ("  switch (state)\n")
   104:   | `reglex (lexeme_start, buffer_end, lexeme_end) ->
   105:     tack ("  char const *p= "^lexeme_start^";\n");
   106:     tack ("  int state = 1;\n");
   107:     tack ("  int last_state = 0;\n");
   108:     tack ("  "^lexeme_end^" = NULL;\n");
   109:     tack ("  if(accept[state]){\n");
   110:     tack ("    last_state = state;\n");
   111:     tack ("    "^lexeme_end^" = p;\n");
   112:     tack ("  }\n");
   113:     tack ("  while(state && p != "^buffer_end^") {\n");
   114:     tack ("    state = matrix[int(*p++)][state];\n");
   115:     tack ("    if(accept[state]){\n");
   116:     tack ("      last_state = state;\n");
   117:     tack ("      "^lexeme_end^" = p;\n");
   118:     tack ("    }\n");
   119:     tack ("  }\n");
   120:     tack ("  switch (last_state)\n")
   121:   | _ -> assert false
   122:   end
   123:   ;
   124:   tack "  {\n";
   125:   Hashtbl.iter
   126:   (fun state expr ->
   127:     tack
   128:     (
   129:       "    case " ^ si (state + 1)^ ":"^
   130:       " return " ^ ge expr ^ ";\n"
   131:     )
   132:   )
   133:   cases
   134:   ;
   135:   let f,sl,sc,el,ec = sr in
   136:   let s = Flx_print.string_of_string f ^ "," ^
   137:     si sl ^ "," ^ si sc ^ "," ^
   138:     si el ^ "," ^ si ec
   139:   in
   140:   tack ("    case 0: FLX_MATCH_FAILURE("^s^");\n");
   141:   tack ("    default: FLX_MATCH_FAILURE("^s^");\n");
   142:   tack "  }\n";
   143: 
   144: 
End ocaml section to src/flx_regen.ml[1]