1: # 4 "./lpsrc/flx_regen.ipk" 2: open Flx_ast 3: open Flx_types 4: 5: type reg_kind_t = 6: [ 7: | `regmatch of string * string 8: | `reglex of string * string * string 9: ] 10: 11: val regen: 12: Buffer.t -> 13: range_srcref -> 14: regular_args_t -> 15: reg_kind_t -> 16: (tbexpr_t -> string) -> 17: unit 18:
1: # 23 "./lpsrc/flx_regen.ipk" 2: open Flx_ast 3: open Flx_types 4: open Flx_util 5: 6: type reg_kind_t = 7: [ 8: | `regmatch of string * string 9: | `reglex of string * string * string 10: ] 11: 12: 13: let make_dtran nchars nstates matrix = 14: (* transition matrix *) 15: let d = Array.make_matrix nchars nstates (-1) in 16: Hashtbl.iter 17: (fun (c,s1) s2 -> 18: d.(c).(s1) <- s2 19: ) 20: matrix 21: ; 22: d 23: 24: let emit_matrix tack nchars nstates d = 25: (* find equivalent chars *) 26: tack " // state->state transition vectors for canonical characters\n"; 27: let canon = Array.make nchars 0 in 28: for i = 0 to nchars - 1 do 29: try 30: for j = 0 to i do 31: if d.(i) = d.(j) then 32: begin 33: canon.(i) <- j; 34: if i = j then 35: begin 36: tack(" static int s"^si i^"["^si (nstates+1)^"]=\n"); 37: tack " {0, // error\n" 38: ; 39: for state = 0 to nstates - 1 do 40: if state mod 16 = 0 then tack " "; 41: let s = " " ^ si (d.(i).(state)+1) in 42: let n = String.length s in 43: let s = String.sub s (n-3) 3 in 44: tack s; 45: if state <> nstates - 1 then tack ", "; 46: if state mod 16 = 15 then tack "\n" 47: done 48: ; 49: tack "\n"; 50: tack " };\n" 51: end; 52: raise Not_found; 53: end 54: done (* j *) 55: with Not_found -> () 56: done (* i *) 57: ; 58: tack (" //char -> (state->state) lookup\n"); 59: tack (" static int *matrix["^si nchars^"] =\n"); 60: tack " {\n"; 61: for i = 0 to nchars - 1 do 62: if i mod 16 = 0 then tack " "; 63: let s = " s" ^ si canon.(i) in 64: let n = String.length s in 65: let s = String.sub s (n-4) 4 in 66: tack s; 67: if i <> nchars - 1 then tack ", "; 68: if i mod 16 = 15 then tack "\n" 69: done; 70: tack " };\n\n" 71: 72: 73: let regen b sr (alpha, nstates, cases, matrix) kind ge = 74: let nchars = 256 in 75: let tack s = bcat b s in 76: 77: tack (" // regmatch/lex: error state=0, start state=1, valid states=1 to "^si nstates^"\n"); 78: 79: let d = make_dtran nchars nstates matrix in 80: begin match kind with | `reglex _ -> 81: tack (" // accepting states\n"); 82: tack (" static int accept["^si (nstates+1)^"]={\n"); 83: tack ( 84: " 0," ^ 85: catmap "," 86: (fun i -> match Hashtbl.mem cases i with 87: | true -> "1" | false -> "0" 88: ) 89: (nlist nstates) 90: ); 91: tack "\n };\n"; 92: | _ -> () 93: end 94: ; 95: emit_matrix tack nchars nstates d 96: ; 97: begin match kind with 98: | `regmatch (lexeme_start,buffer_end) -> 99: tack (" char const*p = "^lexeme_start^";\n"); 100: tack (" int state = 1;\n"); 101: tack (" while(state && p != "^buffer_end^")\n"); 102: tack (" state = matrix[int(*p++)][state];\n"); 103: tack (" switch (state)\n") 104: | `reglex (lexeme_start, buffer_end, lexeme_end) -> 105: tack (" char const *p= "^lexeme_start^";\n"); 106: tack (" int state = 1;\n"); 107: tack (" int last_state = 0;\n"); 108: tack (" "^lexeme_end^" = NULL;\n"); 109: tack (" if(accept[state]){\n"); 110: tack (" last_state = state;\n"); 111: tack (" "^lexeme_end^" = p;\n"); 112: tack (" }\n"); 113: tack (" while(state && p != "^buffer_end^") {\n"); 114: tack (" state = matrix[int(*p++)][state];\n"); 115: tack (" if(accept[state]){\n"); 116: tack (" last_state = state;\n"); 117: tack (" "^lexeme_end^" = p;\n"); 118: tack (" }\n"); 119: tack (" }\n"); 120: tack (" switch (last_state)\n") 121: | _ -> assert false 122: end 123: ; 124: tack " {\n"; 125: Hashtbl.iter 126: (fun state expr -> 127: tack 128: ( 129: " case " ^ si (state + 1)^ ":"^ 130: " return " ^ ge expr ^ ";\n" 131: ) 132: ) 133: cases 134: ; 135: let f,sl,sc,el,ec = sr in 136: let s = Flx_print.string_of_string f ^ "," ^ 137: si sl ^ "," ^ si sc ^ "," ^ 138: si el ^ "," ^ si ec 139: in 140: tack (" case 0: FLX_MATCH_FAILURE("^s^");\n"); 141: tack (" default: FLX_MATCH_FAILURE("^s^");\n"); 142: tack " }\n"; 143: 144: